diff --git a/arch/arm/mach-msm/include/mach/ppmu.h b/arch/arm/mach-msm/include/mach/ppmu.h new file mode 100644 index 00000000..684ce5cd --- /dev/null +++ b/arch/arm/mach-msm/include/mach/ppmu.h @@ -0,0 +1,122 @@ +/* linux/arch/arm/mach-exynos/include/mach/ppmu.h + * + * Copyright (c) 2010 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * EXYNOS4 - PPMU support + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#ifndef __ASM_ARCH_PPMU_H +#define __ASM_ARCH_PPMU_H __FILE__ + +#define NUMBER_OF_COUNTER 4 + +#define PPMU_CNTENS 0x10 +#define PPMU_CNTENC 0x20 +#define PPMU_INTENS 0x30 +#define PPMU_INTENC 0x40 +#define PPMU_FLAG 0x50 + +#define PPMU_CCNT 0x100 +#define PPMU_PMCNT0 0x110 +#define PPMU_PMCNT_OFFSET 0x10 + +#define PPMU_BEVT0SEL 0x1000 +#define PPMU_BEVTSEL_OFFSET 0x100 +#define PPMU_CNT_RESET 0x1800 + +#define DEVT0_SEL 0x1000 +#define DEVT0_ID 0x1010 +#define DEVT0_IDMSK 0x1014 +#define DEVT_ID_OFFSET 0x100 + +#define DEFAULT_WEIGHT 1 + +#define MAX_CCNT 100 + +/* For flags */ +#define VIDEO_DOMAIN 0x00000001 +#define AUDIO_DOMAIN 0x00000002 +#define ALL_DOMAIN 0xffffffff + +/* For event */ +#define RD_DATA_COUNT 0x00000005 +#define WR_DATA_COUNT 0x00000006 +#define RDWR_DATA_COUNT 0x00000007 + +#define PMCNT_OFFSET(i) (PPMU_PMCNT0 + (PPMU_PMCNT_OFFSET * i)) + +enum ppmu_counter { + PPMU_PMNCNT0, + PPMU_PMCCNT1, + PPMU_PMNCNT2, + PPMU_PMNCNT3, + PPMU_PMNCNT_MAX, +}; + +enum ppmu_ch { + DMC0, + DMC1, +}; + +enum ppmu_type { + PPMU_MIF, + PPMU_INT, + PPMU_TYPE_END, +}; + +enum exynos4_ppmu { + PPMU_DMC0, + PPMU_DMC1, + PPMU_CPU, +#ifdef CONFIG_ARCH_EXYNOS5 + PPMU_DDR_C, + PPMU_DDR_R1, + PPMU_DDR_L, + PPMU_RIGHT0_BUS, +#endif + PPMU_END, +}; + +extern unsigned long long ppmu_load[PPMU_END]; +extern unsigned long long ppmu_load_detail[2][PPMU_END]; + +struct exynos4_ppmu_hw { + struct list_head node; + void __iomem *hw_base; + unsigned int ccnt; + unsigned int event[NUMBER_OF_COUNTER]; + unsigned int weight; + int usage; + int id; + unsigned int flags; + struct device *dev; + unsigned int count[NUMBER_OF_COUNTER]; +}; + +void exynos4_ppc_reset(struct exynos4_ppmu_hw *ppmu); +void exynos4_ppc_start(struct exynos4_ppmu_hw *ppmu); +void exynos4_ppc_stop(struct exynos4_ppmu_hw *ppmu); +void exynos4_ppc_setevent(struct exynos4_ppmu_hw *ppmu, + unsigned int evt_num); +unsigned long long exynos4_ppc_update(struct exynos4_ppmu_hw *ppmu); + +void exynos4_ppmu_reset(struct exynos4_ppmu_hw *ppmu); +void exynos4_ppmu_start(struct exynos4_ppmu_hw *ppmu); +void exynos4_ppmu_stop(struct exynos4_ppmu_hw *ppmu); +void exynos4_ppmu_setevent(struct exynos4_ppmu_hw *ppmu, + unsigned int evt_num); +unsigned long long exynos4_ppmu_update(struct exynos4_ppmu_hw *ppmu, int ch); + +void ppmu_init(struct exynos4_ppmu_hw *ppmu, struct device *dev); +void ppmu_start(struct device *dev); +void ppmu_update(struct device *dev, int ch); +void ppmu_reset(struct device *dev); + +extern struct exynos4_ppmu_hw exynos_ppmu[]; +#endif /* __ASM_ARCH_PPMU_H */ + diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 5bd1018c..5bbb4a78 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -50,63 +50,47 @@ choice This option sets which CPUFreq governor shall be loaded at startup. If in doubt, select 'performance'. -config CPU_FREQ_DEFAULT_GOV_PERFORMANCE - bool "performance" +config CPU_FREQ_DEFAULT_GOV_ABYSSPLUG + bool "abyssplug" + select CPU_FREQ_GOV_ABYSSPLUG select CPU_FREQ_GOV_PERFORMANCE + ---help--- + Use the CPUFreq governor 'abyssplug' as default. This allows you + to get a full dynamic frequency capable system with CPU + hotplug support by simply loading your cpufreq low-level + hardware driver. Be aware that not all cpufreq drivers + support the hotplug governor. If unsure have a look at + the help section of the driver. Fallback governor will be the + performance governor. + +config CPU_FREQ_DEFAULT_GOV_ADAPTIVE + bool "adaptive" + select CPU_FREQ_GOV_ADAPTIVE help - Use the CPUFreq governor 'performance' as default. This sets - the frequency statically to the highest frequency supported by - the CPU. - -config CPU_FREQ_DEFAULT_GOV_POWERSAVE - bool "powersave" - depends on EXPERT - select CPU_FREQ_GOV_POWERSAVE - help - Use the CPUFreq governor 'powersave' as default. This sets - the frequency statically to the lowest frequency supported by - the CPU. + Use the CPUFreq governor 'adaptive' as default. This allows + you to get a full dynamic cpu frequency capable system by simply + loading your cpufreq low-level hardware driver, using the + 'adaptive' governor for latency-sensitive workloads and demanding + performance. -config CPU_FREQ_DEFAULT_GOV_USERSPACE - bool "userspace" - select CPU_FREQ_GOV_USERSPACE - help - Use the CPUFreq governor 'userspace' as default. This allows - you to set the CPU frequency manually or when a userspace - program shall be able to set the CPU dynamically without having - to enable the userspace governor manually. +config CPU_FREQ_DEFAULT_GOV_ASSWAX + bool "asswax" + select CPU_FREQ_GOV_ASSWAX + help + Use as default governor -config CPU_FREQ_DEFAULT_GOV_ONDEMAND - bool "ondemand" - select CPU_FREQ_GOV_ONDEMAND +config CPU_FREQ_DEFAULT_GOV_BADASS + bool "badass" + select CPU_FREQ_GOV_BADASS select CPU_FREQ_GOV_PERFORMANCE help - Use the CPUFreq governor 'ondemand' as default. This allows + Use the CPUFreq governor 'badass' as default. This allows you to get a full dynamic frequency capable system by simply loading your cpufreq low-level hardware driver. - Be aware that not all cpufreq drivers support the ondemand + Be aware that not all cpufreq drivers support the badass governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. -config CPU_FREQ_DEFAULT_GOV_BADASS - bool "badass" - select CPU_FREQ_GOV_BADASS - help - 'badass' - This driver adds a dynamic cpufreq policy governor. - The governor does a periodic polling and - changes frequency based on the CPU utilization. - The support for this governor depends on CPU capability to - do fast frequency switching (i.e, very low latency frequency - transitions). - - If in doubt, say N - -config CPU_FREQ_DEFAULT_GOV_SMARTASSV2 - bool "badass" - select CPU_FREQ_GOV_SMARTASSV2 - help - Use the CPUFreq governor 'smartassv2' as default. - config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE bool "conservative" select CPU_FREQ_GOV_CONSERVATIVE @@ -119,6 +103,36 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. +config CPU_FREQ_DEFAULT_GOV_SMARTASSV2 + bool "smartassv2" + select CPU_FREQ_GOV_SMARTASSV2 + help + +config CPU_FREQ_DEFAULT_GOV_SAVAGEDZEN + bool "savagedzen" + select CPU_FREQ_GOV_SAVAGEDZEN + help + +config CPU_FREQ_DEFAULT_GOV_LULZACTIVE + bool "lulzactive" + select CPU_FREQ_GOV_LULZACTIVE + help + +config CPU_FREQ_DEFAULT_GOV_MINMAX + bool "minmax" + select CPU_FREQ_GOV_MINMAX + help + +config CPU_FREQ_DEFAULT_GOV_LIONHEART + bool "lionheart" + select CPU_FREQ_GOV_LIONHEART + help + +config CPU_FREQ_DEFAULT_GOV_DANCEDANCE + bool "dancedance" + select CPU_FREQ_GOV_DANCEDANCE + help + config CPU_FREQ_DEFAULT_GOV_INTERACTIVE bool "interactive" select CPU_FREQ_GOV_INTERACTIVE @@ -128,132 +142,231 @@ config CPU_FREQ_DEFAULT_GOV_INTERACTIVE loading your cpufreq low-level hardware driver, using the 'interactive' governor for latency-sensitive workloads. +config CPU_FREQ_DEFAULT_GOV_INTELLIDEMAND + bool "intellidemand" + select CPU_FREQ_GOV_INTELLIDEMAND + help + Use the CPUFreq governor 'intellidemand' as default. This is + based on Ondemand with browsing detection based on GPU loading -config CPU_FREQ_DEFAULT_GOV_LIONHEART - bool "lionheart" - select CPU_FREQ_GOV_LIONHEART +config CPU_FREQ_DEFAULT_GOV_NIGHTMARE + bool "nightmare" + select CPU_FREQ_GOV_NIGHTMARE + help + +config CPU_FREQ_DEFAULT_GOV_ONDEMAND + bool "ondemand" + select CPU_FREQ_GOV_ONDEMAND + select CPU_FREQ_GOV_PERFORMANCE + help + Use the CPUFreq governor 'ondemand' as default. This allows + you to get a full dynamic frequency capable system by simply + loading your cpufreq low-level hardware driver. + Be aware that not all cpufreq drivers support the ondemand + governor. If unsure have a look at the help section of the + driver. Fallback governor will be the performance governor. + +config CPU_FREQ_DEFAULT_GOV_PEGASUSQ + bool "pegasusq" + select CPU_FREQ_GOV_PEGASUSQ help - Use the CPUFreq governor 'lionheart' as default + Use the CPUFreq governor 'pegasusq' as default. -config CPU_FREQ_DEFAULT_GOV_INTELLIDEMAND +config CPU_FREQ_DEFAULT_GOV_SLP + bool "slp" + select CPU_FREQ_GOV_SLP + help + Use the CPUFreq governor 'slp' as default. - bool "intellidemand" - select CPU_FREQ_GOV_INTELLIDEMAND +config CPU_FREQ_DEFAULT_GOV_PERFORMANCE + bool "performance" + select CPU_FREQ_GOV_PERFORMANCE + help + Use the CPUFreq governor 'performance' as default. This sets + the frequency statically to the highest frequency supported by + the CPU. + +config CPU_FREQ_DEFAULT_GOV_POWERSAVE + bool "powersave" + depends on EXPERT + select CPU_FREQ_GOV_POWERSAVE + help + Use the CPUFreq governor 'powersave' as default. This sets + the frequency statically to the lowest frequency supported by + the CPU. + +config CPU_FREQ_DEFAULT_GOV_SMARTASSH3 + bool "smartassH3" + select CPU_FREQ_GOV_SMARTASSH3 + help + Use the CPUFreq governor 'slp' as default. + +config CPU_FREQ_DEFAULT_GOV_USERSPACE + bool "userspace" + select CPU_FREQ_GOV_USERSPACE help - Use the CPUFreq governor 'intellidemand' as default. + Use the CPUFreq governor 'userspace' as default. This allows + you to set the CPU frequency manually or when a userspace + program shall be able to set the CPU dynamically without having + to enable the userspace governor manually. +config CPU_FREQ_DEFAULT_GOV_WHEATLEY + bool "wheatley" + select CPU_FREQ_GOV_WHEATLEY + select CPU_FREQ_GOV_PERFORMANCE + ---help--- + Use the CPUFreq governor 'wheatley' as default. endchoice -config CPU_FREQ_GOV_PERFORMANCE - tristate "'performance' governor" +config CPU_FREQ_GOV_ABYSSPLUG + tristate "'abyssplug' cpufreq governor" + depends on CPU_FREQ && NO_HZ && HOTPLUG_CPU + ---help--- + 'abyssplug' - this driver mimics the frequency scaling behavior + in 'ondemand', but with several key differences. First is + that frequency transitions use the CPUFreq table directly, + instead of incrementing in a percentage of the maximum + available frequency. Second 'abyssplug' will offline auxillary + CPUs when the system is idle, and online those CPUs once the + system becomes busy again. This last feature is needed for + architectures which transition to low power states when only + the "master" CPU is online, or for thermally constrained + devices. + If you don't have one of these architectures or devices, use + 'ondemand' instead. + If in doubt, say N. + +config CPU_FREQ_GOV_ADAPTIVE + tristate "'adaptive' cpufreq policy governor" help - This cpufreq governor sets the frequency statically to the - highest available CPU frequency. + 'adaptive' - This driver adds a dynamic cpufreq policy governor + designed for latency-sensitive workloads and also for demanding + performance. + + This governor attempts to reduce the latency of clock + increases so that the system is more responsive to + interactive workloads in loweset steady-state but to + to reduce power consumption in middle operation level level up + will be done in step by step to prohibit system from going to + max operation level. To compile this driver as a module, choose M here: the - module will be called cpufreq_performance. + module will be called cpufreq_adaptive. - If in doubt, say Y. + For details, take a look at linux/Documentation/cpu-freq. -config CPU_FREQ_GOV_BADASS - tristate "'badass' cpufreq governor" + If in doubt, say N. + +config CPU_FREQ_GOV_ASSWAX + tristate "'asswax' cpufreq governor" depends on CPU_FREQ + help + Use as default governors + +config CPU_FREQ_GOV_BADASS + tristate "'badass' cpufreq policy governor" + select CPU_FREQ_TABLE help 'badass' - This driver adds a dynamic cpufreq policy governor. The governor does a periodic polling and changes frequency based on the CPU utilization. The support for this governor depends on CPU capability to - do fast frequency switching (i.e, very low latency frequency - transitions). - + do fast frequency switching (i.e, very low latency frequency transitions). + To compile this driver as a module, choose M here: the + module will be called cpufreq_badass. If in doubt, say N. -config CPU_FREQ_GOV_BADASS_2_PHASE - tristate "'2-phase' power-efficiency badass algorithm" - depends on CPU_FREQ_GOV_BADASS +config CPU_FREQ_GOV_CONSERVATIVE + tristate "'conservative' cpufreq governor" + depends on CPU_FREQ help - '2-phase' - This driver adds a new algo to save power + 'conservative' - this driver is rather similar to the 'ondemand' + governor both in its source code and its purpose, the difference is + its optimisation for better suitability in a battery powered + environment. The frequency is gracefully increased and decreased + rather than jumping to 100% when speed is required. -config CPU_FREQ_GOV_BADASS_2_PHASE_FREQ - int "'2-phase' badass frequency" - default 918000 - depends on CPU_FREQ_GOV_BADASS - depends on CPU_FREQ_GOV_BADASS_2_PHASE + If you have a desktop machine then you should really be considering + the 'ondemand' governor instead, however if you are using a laptop, + PDA or even an AMD64 based computer (due to the unacceptable + step-by-step latency issues between the minimum and maximum frequency + transitions in the CPU) you will probably want to use this governor. -config CPU_FREQ_GOV_BADASS_3_PHASE - tristate "'3-phase' power-efficiency badass algorithm" - depends on CPU_FREQ_GOV_BADASS - depends on CPU_FREQ_GOV_BADASS_2_PHASE - help - '3-phase' - This driver adds a new algo to save power + To compile this driver as a module, choose M here: the + module will be called cpufreq_conservative. -config CPU_FREQ_GOV_BADASS_3_PHASE_FREQ - int "'3-phase' badass frequency" - default 1188000 - depends on CPU_FREQ_GOV_BADASS - depends on CPU_FREQ_GOV_BADASS_2_PHASE - depends on CPU_FREQ_GOV_BADASS_3_PHASE + For details, take a look at linux/Documentation/cpu-freq. -config CPU_FREQ_GOV_BADASS_GPU_CONTROL - tristate "'gpu_control' power-efficiency badass algorithm" - depends on CPU_FREQ_GOV_BADASS - depends on CPU_FREQ_GOV_BADASS_2_PHASE - help - 'gpu_control' - This driver adds a new algo to save power + If in doubt, say N. -config CPU_FREQ_GOV_BADASS_LOWBAT_POWERSAVE - tristate "'lowbat_powersave' power-efficiency badass algorithm" - depends on CPU_FREQ_GOV_BADASS - help - 'lowbat_powersave' - This driver adds a new algo to save power +config CPU_FREQ_GOV_DANCEDANCE + tristate "'dancedance' cpufreq governor" + depends on CPU_FREQ -config CPU_FREQ_GOV_BADASS_ALLOW_BYPASS - tristate "Allows bypassing phases" - depends on CPU_FREQ_GOV_BADASS - depends on CPU_FREQ_GOV_BADASS_2_PHASE +config CPU_FREQ_GOV_INTERACTIVE + tristate "'interactive' cpufreq policy governor" help - 'allow_bypass' - This driver adds a bypass to the phases - + 'interactive' - This driver adds a dynamic cpufreq policy governor + designed for latency-sensitive workloads. -config CPU_FREQ_GOV_POWERSAVE - tristate "'powersave' governor" - help - This cpufreq governor sets the frequency statically to the - lowest available CPU frequency. + This governor attempts to reduce the latency of clock + increases so that the system is more responsive to + interactive workloads. To compile this driver as a module, choose M here: the - module will be called cpufreq_powersave. + module will be called cpufreq_interactive. - If in doubt, say Y. + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. + +config CPU_FREQ_GOV_INTELLIDEMAND + tristate "'intellidemand' cpufreq policy governor" + select CPU_FREQ_TABLE + help + 'intellidemand' - This driver adds a dynamic cpufreq policy governor. + The governor does a periodic polling and + changes frequency based on the CPU utilization. + The support for this governor depends on CPU capability to + do fast frequency switching (i.e, very low latency frequency + transitions). with browsing detection based on GPU loading + + To compile this driver as a module, choose M here: the + module will be called cpufreq_ondemand. + + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. config CPU_FREQ_GOV_LIONHEART tristate "lionheart" depends on CPU_FREQ help - Use the CPUFreq governor 'lionheart' as default. config CPU_FREQ_GOV_SMARTASSV2 tristate "smartassv2" depends on CPU_FREQ help - Use the CPUFreq governor 'smartassv2' as default. - -config CPU_FREQ_GOV_USERSPACE - tristate "'userspace' governor for userspace frequency scaling" +config CPU_FREQ_GOV_SAVAGEDZEN + tristate "savagedzen" + depends on CPU_FREQ help - Enable this cpufreq governor when you either want to set the - CPU frequency manually or when a userspace program shall - be able to set the CPU dynamically, like on LART - . - To compile this driver as a module, choose M here: the - module will be called cpufreq_userspace. +config CPU_FREQ_GOV_LULZACTIVE + tristate "lulzactive" + depends on CPU_FREQ + help - For details, take a look at . +config CPU_FREQ_GOV_MINMAX + tristate "minmax" + depends on CPU_FREQ + help - If in doubt, say Y. +config CPU_FREQ_GOV_NIGHTMARE + tristate "'nightmare' cpufreq governor" + depends on CPU_FREQ config CPU_FREQ_GOV_ONDEMAND tristate "'ondemand' cpufreq policy governor" @@ -273,56 +386,68 @@ config CPU_FREQ_GOV_ONDEMAND If in doubt, say N. -config CPU_FREQ_GOV_ONDEMAND_2_PHASE - tristate "'2-phase' power-efficiency ondemand algorithm" - depends on CPU_FREQ_GOV_ONDEMAND +config CPU_FREQ_GOV_PERFORMANCE + tristate "'performance' governor" help - '2-phase' - This driver adds a new algo to save power + This cpufreq governor sets the frequency statically to the + highest available CPU frequency. -config CPU_FREQ_GOV_INTERACTIVE - tristate "'interactive' cpufreq policy governor" - help - 'interactive' - This driver adds a dynamic cpufreq policy governor - designed for latency-sensitive workloads. + To compile this driver as a module, choose M here: the + module will be called cpufreq_performance. - This governor attempts to reduce the latency of clock - increases so that the system is more responsive to - interactive workloads. + If in doubt, say Y. + +config CPU_FREQ_GOV_PEGASUSQ + tristate "'pegasusq' cpufreq policy governor" + +config CPU_FREQ_GOV_POWERSAVE + tristate "'powersave' governor" + help + This cpufreq governor sets the frequency statically to the + lowest available CPU frequency. To compile this driver as a module, choose M here: the - module will be called cpufreq_interactive. + module will be called cpufreq_powersave. - For details, take a look at linux/Documentation/cpu-freq. + If in doubt, say Y. - If in doubt, say N. +config CPU_FREQ_GOV_SLP + tristate "'slp' cpufreq policy governor" -config CPU_FREQ_GOV_CONSERVATIVE - tristate "'conservative' cpufreq governor" +config CPU_FREQ_GOV_SMARTASSH3 + tristate "'smartassH3' cpufreq governor" depends on CPU_FREQ help - 'conservative' - this driver is rather similar to the 'ondemand' - governor both in its source code and its purpose, the difference is - its optimisation for better suitability in a battery powered - environment. The frequency is gracefully increased and decreased - rather than jumping to 100% when speed is required. + 'smartassH3' - a "smart" governor - If you have a desktop machine then you should really be considering - the 'ondemand' governor instead, however if you are using a laptop, - PDA or even an AMD64 based computer (due to the unacceptable - step-by-step latency issues between the minimum and maximum frequency - transitions in the CPU) you will probably want to use this governor. +config CPU_FREQ_GOV_USERSPACE + tristate "'userspace' governor for userspace frequency scaling" + help + Enable this cpufreq governor when you either want to set the + CPU frequency manually or when a userspace program shall + be able to set the CPU dynamically, like on LART + . To compile this driver as a module, choose M here: the - module will be called cpufreq_conservative. + module will be called cpufreq_userspace. - For details, take a look at linux/Documentation/cpu-freq. + For details, take a look at . - If in doubt, say N. + If in doubt, say Y. -config CPU_FREQ_GOV_INTELLIDEMAND - tristate "'intellidemand' cpufreq governor" - depends on CPU_FREQ +config CPU_FREQ_GOV_WHEATLEY + tristate "'wheatley' cpufreq governor" + depends on CPU_FREQ +config SEC_DVFS + bool "DVFS job" + default n + depends on CPU_FREQ + +config SEC_DVFS_BOOSTER + bool "DVFS input booster" + default y + depends on SEC_DVFS menu "x86 CPU frequency scaling drivers" depends on X86 @@ -341,3 +466,4 @@ endmenu endif endmenu + diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index cc2230ee..e82f56c4 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -4,21 +4,28 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o # CPUfreq governors -obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o -obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o -obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o -obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o -obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o -obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o -obj-$(CONFIG_CPU_FREQ_GOV_BADASS) += cpufreq_badass.o -obj-$(CONFIG_CPU_FREQ_GOV_LIONHEART) += cpufreq_lionheart.o +obj-$(CONFIG_CPU_FREQ_GOV_ABYSSPLUG) += cpufreq_abyssplug.o +obj-$(CONFIG_CPU_FREQ_GOV_ADAPTIVE) += cpufreq_adaptive.o +obj-$(CONFIG_CPU_FREQ_GOV_ASSWAX) += cpufreq_asswax.o +obj-$(CONFIG_CPU_FREQ_GOV_BADASS) += cpufreq_badass.o +obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o +obj-$(CONFIG_CPU_FREQ_GOV_DANCEDANCE) += cpufreq_dancedance.o obj-$(CONFIG_CPU_FREQ_GOV_INTELLIDEMAND) += cpufreq_intellidemand.o -obj-$(CONFIG_CPU_FREQ_GOV_SMARTASSV2) += cpufreq_smartassv2.o - - +obj-$(CONFIG_CPU_FREQ_GOV_NIGHTMARE) += cpufreq_nightmare.o +obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o +obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o +obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o +obj-$(CONFIG_CPU_FREQ_GOV_SMARTASSH3) += cpufreq_smartassH3.o +obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o +obj-$(CONFIG_CPU_FREQ_GOV_WHEATLEY) += cpufreq_wheatley.o +obj-$(CONFIG_CPU_FREQ_GOV_LIONHEART) += cpufreq_lionheart.o +obj-$(CONFIG_CPU_FREQ_GOV_SMARTASSV2) += cpufreq_smartassv2.o +obj-$(CONFIG_CPU_FREQ_GOV_SAVAGEDZEN) += cpufreq_savagedzen.o +obj-$(CONFIG_CPU_FREQ_GOV_LULZACTIVE) += cpufreq_lulzactive.o +obj-$(CONFIG_CPU_FREQ_GOV_MINMAX) += cpufreq_minmax.o # CPUfreq cross-arch helpers -obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o +obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o ################################################################################## # x86 drivers. diff --git a/drivers/cpufreq/cpufreq_abyssplug.c b/drivers/cpufreq/cpufreq_abyssplug.c new file mode 100644 index 00000000..37df4463 --- /dev/null +++ b/drivers/cpufreq/cpufreq_abyssplug.c @@ -0,0 +1,817 @@ +/* + * CPUFreq AbyssPlug governor + * + * + * Based on hotplug governor + * Copyright (C) 2010 Texas Instruments, Inc. + * Mike Turquette + * Santosh Shilimkar + * + * Based on ondemand governor + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi , + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* greater than 95% avg load across online CPUs increases frequency */ +#define DEFAULT_UP_FREQ_MIN_LOAD (95) + +/* Keep 10% of idle under the up threshold when decreasing the frequency */ +#define DEFAULT_FREQ_DOWN_DIFFERENTIAL (1) + +/* less than 40% avg load across online CPUs decreases frequency */ +#define DEFAULT_DOWN_FREQ_MAX_LOAD (40) + +/* default sampling period (uSec) is bogus; 10x ondemand's default for x86 */ +#define DEFAULT_SAMPLING_PERIOD (50000) + +/* default number of sampling periods to average before hotplug-in decision */ +#define DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS (5) + +/* default number of sampling periods to average before hotplug-out decision */ +#define DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS (20) + +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); +//static int hotplug_boost(struct cpufreq_policy *policy); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ABYSSPLUG +static +#endif +struct cpufreq_governor cpufreq_gov_abyssplug = { + .name = "abyssplug", + .governor = cpufreq_governor_dbs, + .owner = THIS_MODULE, +}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct work_struct cpu_up_work; + struct work_struct cpu_down_work; + struct cpufreq_frequency_table *freq_table; + int cpu; + unsigned int boost_applied; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, hp_cpu_dbs_info); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on + * different CPUs. It protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct workqueue_struct *khotplug_wq; + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int down_threshold; + unsigned int hotplug_in_sampling_periods; + unsigned int hotplug_out_sampling_periods; + unsigned int hotplug_load_index; + unsigned int *hotplug_load_history; + unsigned int ignore_nice; + unsigned int io_is_busy; + unsigned int boost_timeout; +} dbs_tuners_ins = { + .sampling_rate = DEFAULT_SAMPLING_PERIOD, + .up_threshold = DEFAULT_UP_FREQ_MIN_LOAD, + .down_differential = DEFAULT_FREQ_DOWN_DIFFERENTIAL, + .down_threshold = DEFAULT_DOWN_FREQ_MAX_LOAD, + .hotplug_in_sampling_periods = DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS, + .hotplug_out_sampling_periods = DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS, + .hotplug_load_index = 0, + .ignore_nice = 0, + .io_is_busy = 0, + .boost_timeout = 0, +}; + +/* + * A corner case exists when switching io_is_busy at run-time: comparing idle + * times from a non-io_is_busy period to an io_is_busy period (or vice-versa) + * will misrepresent the actual change in system idleness. We ignore this + * corner case: enabling io_is_busy might cause freq increase and disabling + * might cause freq decrease, which probably matches the original intent. + */ +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time; + u64 iowait_time; + + /* cpufreq-abyssplug always assumes CONFIG_NO_HZ */ + idle_time = get_cpu_idle_time_us(cpu, wall); + + /* add time spent doing I/O to idle time */ + if (dbs_tuners_ins.io_is_busy) { + iowait_time = get_cpu_iowait_time_us(cpu, wall); + /* cpufreq-abyssplug always assumes CONFIG_NO_HZ */ + if (iowait_time != -1ULL && idle_time >= iowait_time) + idle_time -= iowait_time; + } + + return idle_time; +} + +/************************** sysfs interface ************************/ + +/* XXX look at global sysfs macros in cpufreq.h, can those be used here? */ + +/* cpufreq_abyssplug Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(up_threshold, up_threshold); +show_one(down_differential, down_differential); +show_one(down_threshold, down_threshold); +show_one(hotplug_in_sampling_periods, hotplug_in_sampling_periods); +show_one(hotplug_out_sampling_periods, hotplug_out_sampling_periods); +show_one(ignore_nice_load, ignore_nice); +show_one(io_is_busy, io_is_busy); +show_one(boost_timeout, boost_timeout); + +static ssize_t store_boost_timeout(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.boost_timeout = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.sampling_rate = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input <= dbs_tuners_ins.down_threshold) { + return -EINVAL; + } + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.up_threshold = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_down_differential(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input >= dbs_tuners_ins.up_threshold) + return -EINVAL; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.down_differential = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_down_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input >= dbs_tuners_ins.up_threshold) { + return -EINVAL; + } + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.down_threshold = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_hotplug_in_sampling_periods(struct kobject *a, + struct attribute *b, const char *buf, size_t count) +{ + unsigned int input; + unsigned int *temp; + unsigned int max_windows; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + /* already using this value, bail out */ + if (input == dbs_tuners_ins.hotplug_in_sampling_periods) + return count; + + mutex_lock(&dbs_mutex); + ret = count; + max_windows = max(dbs_tuners_ins.hotplug_in_sampling_periods, + dbs_tuners_ins.hotplug_out_sampling_periods); + + /* no need to resize array */ + if (input <= max_windows) { + dbs_tuners_ins.hotplug_in_sampling_periods = input; + goto out; + } + + /* resize array */ + temp = kmalloc((sizeof(unsigned int) * input), GFP_KERNEL); + + if (!temp || IS_ERR(temp)) { + ret = -ENOMEM; + goto out; + } + + memcpy(temp, dbs_tuners_ins.hotplug_load_history, + (max_windows * sizeof(unsigned int))); + kfree(dbs_tuners_ins.hotplug_load_history); + + /* replace old buffer, old number of sampling periods & old index */ + dbs_tuners_ins.hotplug_load_history = temp; + dbs_tuners_ins.hotplug_in_sampling_periods = input; + dbs_tuners_ins.hotplug_load_index = max_windows; +out: + mutex_unlock(&dbs_mutex); + + return ret; +} + +static ssize_t store_hotplug_out_sampling_periods(struct kobject *a, + struct attribute *b, const char *buf, size_t count) +{ + unsigned int input; + unsigned int *temp; + unsigned int max_windows; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + /* already using this value, bail out */ + if (input == dbs_tuners_ins.hotplug_out_sampling_periods) + return count; + + mutex_lock(&dbs_mutex); + ret = count; + max_windows = max(dbs_tuners_ins.hotplug_in_sampling_periods, + dbs_tuners_ins.hotplug_out_sampling_periods); + + /* no need to resize array */ + if (input <= max_windows) { + dbs_tuners_ins.hotplug_out_sampling_periods = input; + goto out; + } + + /* resize array */ + temp = kmalloc((sizeof(unsigned int) * input), GFP_KERNEL); + + if (!temp || IS_ERR(temp)) { + ret = -ENOMEM; + goto out; + } + + memcpy(temp, dbs_tuners_ins.hotplug_load_history, + (max_windows * sizeof(unsigned int))); + kfree(dbs_tuners_ins.hotplug_load_history); + + /* replace old buffer, old number of sampling periods & old index */ + dbs_tuners_ins.hotplug_load_history = temp; + dbs_tuners_ins.hotplug_out_sampling_periods = input; + dbs_tuners_ins.hotplug_load_index = max_windows; +out: + mutex_unlock(&dbs_mutex); + + return ret; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + mutex_lock(&dbs_mutex); + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + mutex_unlock(&dbs_mutex); + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(hp_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + + } + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.io_is_busy = !!input; + mutex_unlock(&dbs_mutex); + + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(up_threshold); +define_one_global_rw(down_differential); +define_one_global_rw(down_threshold); +define_one_global_rw(hotplug_in_sampling_periods); +define_one_global_rw(hotplug_out_sampling_periods); +define_one_global_rw(ignore_nice_load); +define_one_global_rw(io_is_busy); +define_one_global_rw(boost_timeout); + +static struct attribute *dbs_attributes[] = { + &sampling_rate.attr, + &up_threshold.attr, + &down_differential.attr, + &down_threshold.attr, + &hotplug_in_sampling_periods.attr, + &hotplug_out_sampling_periods.attr, + &ignore_nice_load.attr, + &io_is_busy.attr, + &boost_timeout.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "abyssplug", +}; + +/************************** sysfs end ************************/ + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + /* combined load of all enabled CPUs */ + unsigned int total_load = 0; + /* single largest CPU load percentage*/ + unsigned int max_load = 0; + /* largest CPU load in terms of frequency */ + unsigned int max_load_freq = 0; + /* average load across all enabled CPUs */ + unsigned int avg_load = 0; + /* average load across multiple sampling periods for hotplug events */ + unsigned int hotplug_in_avg_load = 0; + unsigned int hotplug_out_avg_load = 0; + /* number of sampling periods averaged for hotplug decisions */ + unsigned int periods; + + struct cpufreq_policy *policy; + unsigned int i, j; + + policy = this_dbs_info->cur_policy; + + /* + * cpu load accounting + * get highest load, total load and average load across all CPUs + */ + for_each_cpu(j, policy->cpus) { + unsigned int load; + unsigned int idle_time, wall_time; + cputime64_t cur_wall_time, cur_idle_time; + struct cpu_dbs_info_s *j_dbs_info; + + j_dbs_info = &per_cpu(hp_cpu_dbs_info, j); + + /* update both cur_idle_time and cur_wall_time */ + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + + /* how much wall time has passed since last iteration? */ + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + j_dbs_info->prev_cpu_wall); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + /* how much idle time has passed since last iteration? */ + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + j_dbs_info->prev_cpu_idle); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + /* load is the percentage of time not spent in idle */ + load = 100 * (wall_time - idle_time) / wall_time; + + /* keep track of combined load across all CPUs */ + total_load += load; + + /* keep track of highest single load across all CPUs */ + if (load > max_load) + max_load = load; + } + + /* use the max load in the OPP freq change policy */ + max_load_freq = max_load * policy->cur; + + /* calculate the average load across all related CPUs */ + avg_load = total_load / num_online_cpus(); + + mutex_lock(&dbs_mutex); + + /* + * hotplug load accounting + * average load over multiple sampling periods + */ + + /* how many sampling periods do we use for hotplug decisions? */ + periods = max(dbs_tuners_ins.hotplug_in_sampling_periods, + dbs_tuners_ins.hotplug_out_sampling_periods); + + /* store avg_load in the circular buffer */ + dbs_tuners_ins.hotplug_load_history[dbs_tuners_ins.hotplug_load_index] + = avg_load; + + /* compute average load across in & out sampling periods */ + for (i = 0, j = dbs_tuners_ins.hotplug_load_index; + i < periods; i++, j--) { + if (i < dbs_tuners_ins.hotplug_in_sampling_periods) + hotplug_in_avg_load += + dbs_tuners_ins.hotplug_load_history[j]; + if (i < dbs_tuners_ins.hotplug_out_sampling_periods) + hotplug_out_avg_load += + dbs_tuners_ins.hotplug_load_history[j]; + + if (j == 0) + j = periods; + } + + hotplug_in_avg_load = hotplug_in_avg_load / + dbs_tuners_ins.hotplug_in_sampling_periods; + + hotplug_out_avg_load = hotplug_out_avg_load / + dbs_tuners_ins.hotplug_out_sampling_periods; + + /* return to first element if we're at the circular buffer's end */ + if (++dbs_tuners_ins.hotplug_load_index == periods) + dbs_tuners_ins.hotplug_load_index = 0; + + /* check if auxiliary CPU is needed based on avg_load */ + if (avg_load > dbs_tuners_ins.up_threshold) { + /* should we enable auxillary CPUs? */ + if (num_online_cpus() < 2 && hotplug_in_avg_load > + dbs_tuners_ins.up_threshold) { + queue_work_on(this_dbs_info->cpu, khotplug_wq, + &this_dbs_info->cpu_up_work); + goto out; + } + } + + /* check for frequency increase based on max_load */ + if (max_load > dbs_tuners_ins.up_threshold) { + /* increase to highest frequency supported */ + if (policy->cur < policy->max) + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + + goto out; + } + + /* check for frequency decrease */ + if (avg_load < dbs_tuners_ins.down_threshold) { + /* are we at the minimum frequency already? */ + if (policy->cur <= policy->min) { + /* should we disable auxillary CPUs? */ + if (num_online_cpus() > 1 && hotplug_out_avg_load < + dbs_tuners_ins.down_threshold) { + queue_work_on(this_dbs_info->cpu, khotplug_wq, + &this_dbs_info->cpu_down_work); + } + goto out; + } + } + + /* + * go down to the lowest frequency which can sustain the load by + * keeping 30% of idle in order to not cross the up_threshold + */ + if ((max_load_freq < + (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + policy->cur) && (policy->cur > policy->min)) { + unsigned int freq_next; + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + dbs_tuners_ins.down_differential); + + if (freq_next < policy->min) + freq_next = policy->min; + + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } +out: + mutex_unlock(&dbs_mutex); + return; +} + +static void __cpuinit do_cpu_up(struct work_struct *work) +{ + cpu_up(1); +} + +static void __cpuinit do_cpu_down(struct work_struct *work) +{ + cpu_down(1); +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + int delay = 0; + + mutex_lock(&dbs_info->timer_mutex); + if (!dbs_info->boost_applied) { + dbs_check_cpu(dbs_info); + /* We want all related CPUs to do sampling nearly on same jiffy */ + delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + } else { + delay = usecs_to_jiffies(dbs_tuners_ins.boost_timeout); + dbs_info->boost_applied = 0; + if (num_online_cpus() < 2) + queue_work_on(cpu, khotplug_wq, + &dbs_info->cpu_up_work); + } + queue_delayed_work_on(cpu, khotplug_wq, &dbs_info->work, delay); + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all related CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + delay -= jiffies % delay; + + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + if (!dbs_info->boost_applied) + delay = usecs_to_jiffies(dbs_tuners_ins.boost_timeout); + queue_delayed_work_on(dbs_info->cpu, khotplug_wq, &dbs_info->work, + delay); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + cancel_delayed_work_sync(&dbs_info->work); +} + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int i, j, max_periods; + int rc; + + this_dbs_info = &per_cpu(hp_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + mutex_lock(&dbs_mutex); + dbs_enable++; + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(hp_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + + max_periods = max(DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS, + DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS); + dbs_tuners_ins.hotplug_load_history = kmalloc( + (sizeof(unsigned int) * max_periods), + GFP_KERNEL); + if (!dbs_tuners_ins.hotplug_load_history) { + WARN_ON(1); + return -ENOMEM; + } + for (i = 0; i < max_periods; i++) + dbs_tuners_ins.hotplug_load_history[i] = 50; + } + this_dbs_info->cpu = cpu; + this_dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + } + if (!dbs_tuners_ins.boost_timeout) + dbs_tuners_ins.boost_timeout = dbs_tuners_ins.sampling_rate * 30; + mutex_unlock(&dbs_mutex); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); + break; + + case CPUFREQ_GOV_STOP: + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + mutex_destroy(&this_dbs_info->timer_mutex); + dbs_enable--; + mutex_unlock(&dbs_mutex); + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + kfree(dbs_tuners_ins.hotplug_load_history); + /* + * XXX BIG CAVEAT: Stopping the governor with CPU1 offline + * will result in it remaining offline until the user onlines + * it again. It is up to the user to do this (for now). + */ + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, CPUFREQ_RELATION_L); + mutex_unlock(&this_dbs_info->timer_mutex); + break; + } + return 0; +} + +#if 0 +static int hotplug_boost(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + + this_dbs_info = &per_cpu(hp_cpu_dbs_info, cpu); + +#if 0 + /* Already at max? */ + if (policy->cur == policy->max) + return; +#endif + + mutex_lock(&this_dbs_info->timer_mutex); + this_dbs_info->boost_applied = 1; + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + mutex_unlock(&this_dbs_info->timer_mutex); + + return 0; +} +#endif + +static int __init cpufreq_gov_dbs_init(void) +{ + int err; + cputime64_t wall; + u64 idle_time; + int cpu = get_cpu(); + struct cpu_dbs_info_s *dbs_info = &per_cpu(hp_cpu_dbs_info, 0); + + INIT_WORK(&dbs_info->cpu_up_work, do_cpu_up); + INIT_WORK(&dbs_info->cpu_down_work, do_cpu_down); + + idle_time = get_cpu_idle_time_us(cpu, &wall); + put_cpu(); + if (idle_time != -1ULL) { + dbs_tuners_ins.up_threshold = DEFAULT_UP_FREQ_MIN_LOAD; + } else { + pr_err("cpufreq-abyssplug: %s: assumes CONFIG_NO_HZ\n", + __func__); + return -EINVAL; + } + + khotplug_wq = create_workqueue("khotplug"); + if (!khotplug_wq) { + pr_err("Creation of khotplug failed\n"); + return -EFAULT; + } + err = cpufreq_register_governor(&cpufreq_gov_abyssplug); + if (err) + destroy_workqueue(khotplug_wq); + + return err; +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_abyssplug); + destroy_workqueue(khotplug_wq); +} + +MODULE_DESCRIPTION("'cpufreq_abyssplug' - cpufreq governor for dynamic frequency scaling and CPU hotplug"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ABYSSPLUG +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); + diff --git a/drivers/cpufreq/cpufreq_adaptive.c b/drivers/cpufreq/cpufreq_adaptive.c new file mode 100644 index 00000000..2eff3e28 --- /dev/null +++ b/drivers/cpufreq/cpufreq_adaptive.c @@ -0,0 +1,952 @@ +/* + * drivers/cpufreq/cpufreq_adaptive.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) +#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) +#define MIN_ONDEMAND_THRESHOLD (4) +/* + * The polling frequency of this governor depends on the capability of + * the processor. Default polling frequency is 1000 times the transition + * latency of the processor. The governor will work on any processor with + * transition latency <= 10mS, using appropriate sampling + * rate. + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. + * All times here are in uS. + */ +#define MIN_SAMPLING_RATE_RATIO (2) + +static unsigned int min_sampling_rate; + +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (100) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +static void (*pm_idle_old)(void); +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ADAPTIVE +static +#endif +struct cpufreq_governor cpufreq_gov_adaptive = { + .name = "adaptive", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_hi_jiffies; + int cpu; + unsigned int sample_type:1; + bool ondemand; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on + * different CPUs. It protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); +static struct task_struct *up_task; +static struct workqueue_struct *down_wq; +static struct work_struct freq_scale_down_work; +static cpumask_t up_cpumask; +static spinlock_t up_cpumask_lock; +static cpumask_t down_cpumask; +static spinlock_t down_cpumask_lock; + +static DEFINE_PER_CPU(cputime64_t, idle_in_idle); +static DEFINE_PER_CPU(cputime64_t, idle_exit_wall); + +static struct timer_list cpu_timer; +static unsigned int target_freq; +static DEFINE_MUTEX(short_timer_mutex); + +/* Go to max speed when CPU load at or above this value. */ +#define DEFAULT_GO_MAXSPEED_LOAD 60 +static unsigned long go_maxspeed_load; + +#define DEFAULT_KEEP_MINSPEED_LOAD 30 +static unsigned long keep_minspeed_load; + +#define DEFAULT_STEPUP_LOAD 10 +static unsigned long step_up_load; + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int ignore_nice; + unsigned int io_is_busy; +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .ignore_nice = 0, +}; + +static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + + if (iowait_time == -1ULL) + return 0; + + return iowait_time; +} + +static void adaptive_init_cpu(int cpu) +{ + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); +} + +/************************** sysfs interface ************************/ + +static ssize_t show_sampling_rate_max(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + printk_once(KERN_INFO "CPUFREQ: adaptive sampling_rate_max " + "sysfs file is deprecated - used by: %s\n", current->comm); + return sprintf(buf, "%u\n", -1U); +} + +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", min_sampling_rate); +} + +define_one_global_ro(sampling_rate_max); +define_one_global_ro(sampling_rate_min); + +/* cpufreq_adaptive Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(io_is_busy, io_is_busy); +show_one(up_threshold, up_threshold); +show_one(ignore_nice_load, ignore_nice); + +/*** delete after deprecation time ***/ + +#define DEPRECATION_MSG(file_name) \ + printk_once(KERN_INFO "CPUFREQ: Per core adaptive sysfs " \ + "interface is deprecated - " #file_name "\n"); + +#define show_one_old(file_name) \ +static ssize_t show_##file_name##_old \ +(struct cpufreq_policy *unused, char *buf) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core adaptive sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return show_##file_name(NULL, NULL, buf); \ +} + +/*** delete after deprecation time ***/ + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.io_is_busy = !!input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.up_threshold = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + mutex_lock(&dbs_mutex); + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + mutex_unlock(&dbs_mutex); + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = get_cpu_idle_time_us(j, + &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + + } + mutex_unlock(&dbs_mutex); + + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(io_is_busy); +define_one_global_rw(up_threshold); +define_one_global_rw(ignore_nice_load); + +static struct attribute *dbs_attributes[] = { + &sampling_rate_max.attr, + &sampling_rate_min.attr, + &sampling_rate.attr, + &up_threshold.attr, + &ignore_nice_load.attr, + &io_is_busy.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "adaptive", +}; + +/*** delete after deprecation time ***/ + +#define write_one_old(file_name) \ +static ssize_t store_##file_name##_old \ +(struct cpufreq_policy *unused, const char *buf, size_t count) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core adaptive sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return store_##file_name(NULL, NULL, buf, count); \ +} + +static void cpufreq_adaptive_timer(unsigned long data) +{ + cputime64_t cur_idle; + cputime64_t cur_wall; + unsigned int delta_idle; + unsigned int delta_time; + int short_load; + unsigned int new_freq; + unsigned long flags; + struct cpu_dbs_info_s *this_dbs_info; + struct cpufreq_policy *policy; + unsigned int j; + unsigned int index; + unsigned int max_load = 0; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, 0); + + policy = this_dbs_info->cur_policy; + + for_each_online_cpu(j) { + cur_idle = get_cpu_idle_time_us(j, &cur_wall); + + delta_idle = (unsigned int) cputime64_sub(cur_idle, + per_cpu(idle_in_idle, j)); + delta_time = (unsigned int) cputime64_sub(cur_wall, + per_cpu(idle_exit_wall, j)); + + /* + * If timer ran less than 1ms after short-term sample started, retry. + */ + if (delta_time < 1000) + goto do_nothing; + + if (delta_idle > delta_time) + short_load = 0; + else + short_load = 100 * (delta_time - delta_idle) / delta_time; + + if (short_load > max_load) + max_load = short_load; + } + + if (this_dbs_info->ondemand) + goto do_nothing; + + if (max_load >= go_maxspeed_load) + new_freq = policy->max; + else + new_freq = policy->max * max_load / 100; + + if ((max_load <= keep_minspeed_load) && + (policy->cur == policy->min)) + new_freq = policy->cur; + + if (cpufreq_frequency_table_target(policy, this_dbs_info->freq_table, + new_freq, CPUFREQ_RELATION_L, + &index)) { + goto do_nothing; + } + + new_freq = this_dbs_info->freq_table[index].frequency; + + target_freq = new_freq; + + if (new_freq < this_dbs_info->cur_policy->cur) { + spin_lock_irqsave(&down_cpumask_lock, flags); + cpumask_set_cpu(0, &down_cpumask); + spin_unlock_irqrestore(&down_cpumask_lock, flags); + queue_work(down_wq, &freq_scale_down_work); + } else { + spin_lock_irqsave(&up_cpumask_lock, flags); + cpumask_set_cpu(0, &up_cpumask); + spin_unlock_irqrestore(&up_cpumask_lock, flags); + wake_up_process(up_task); + } + + return; + +do_nothing: + for_each_online_cpu(j) { + per_cpu(idle_in_idle, j) = + get_cpu_idle_time_us(j, + &per_cpu(idle_exit_wall, j)); + } + mod_timer(&cpu_timer, jiffies + 2); + schedule_delayed_work_on(0, &this_dbs_info->work, 10); + + if (mutex_is_locked(&short_timer_mutex)) + mutex_unlock(&short_timer_mutex); + return; +} + +/*** delete after deprecation time ***/ + +/************************** sysfs end ************************/ + +static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) +{ +#ifndef CONFIG_ARCH_EXYNOS4 + if (p->cur == p->max) + return; +#endif + __cpufreq_driver_target(p, freq, CPUFREQ_RELATION_H); +} + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int max_load_freq; + + struct cpufreq_policy *policy; + unsigned int j; + + unsigned int index, new_freq; + unsigned int longterm_load = 0; + + policy = this_dbs_info->cur_policy; + + /* + * Every sampling_rate, we check, if current idle time is less + * than 20% (default), then we try to increase frequency + * Every sampling_rate, we look for a the lowest + * frequency which can sustain the load while keeping idle time over + * 30%. If such a frequency exist, we try to decrease to this frequency. + * + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of current frequency + */ + + /* Get Absolute Load - in terms of freq */ + max_load_freq = 0; + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + unsigned int idle_time, wall_time, iowait_time; + unsigned int load, load_freq; + int freq_avg; + + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + + cur_idle_time = get_cpu_idle_time_us(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); + + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + j_dbs_info->prev_cpu_wall); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + j_dbs_info->prev_cpu_idle); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, + j_dbs_info->prev_cpu_iowait); + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - + j_dbs_info->prev_cpu_nice; + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + /* + * For the purpose of adaptive, waiting for disk IO is an + * indication that you're performance critical, and not that + * the system is actually idle. So subtract the iowait time + * from the cpu idle time. + */ + + if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + + if (load > longterm_load) + longterm_load = load; + + freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load_freq = load * freq_avg; + + if (load_freq > max_load_freq) + max_load_freq = load_freq; + } + + if (longterm_load >= MIN_ONDEMAND_THRESHOLD) + this_dbs_info->ondemand = true; + else + this_dbs_info->ondemand = false; + + /* Check for frequency increase */ + if (max_load_freq > (dbs_tuners_ins.up_threshold * policy->cur)) { + cpufreq_frequency_table_target(policy, + this_dbs_info->freq_table, + (policy->cur + step_up_load), + CPUFREQ_RELATION_L, &index); + + new_freq = this_dbs_info->freq_table[index].frequency; + dbs_freq_increase(policy, new_freq); + return; + } + + /* Check for frequency decrease */ + /* if we cannot reduce the frequency anymore, break out early */ +#ifndef CONFIG_ARCH_EXYNOS4 + if (policy->cur == policy->min) + return; +#endif + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus 10 points under the threshold. + */ + if (max_load_freq < + (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + policy->cur) { + unsigned int freq_next; + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + dbs_tuners_ins.down_differential); + + if (freq_next < policy->min) + freq_next = policy->min; + + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + + int delay; + + mutex_lock(&dbs_info->timer_mutex); + + /* Common NORMAL_SAMPLE setup */ + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + dbs_check_cpu(dbs_info); + + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + schedule_delayed_work_on(cpu, &dbs_info->work, delay); + + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + cancel_delayed_work_sync(&dbs_info->work); +} + +/* + * Not all CPUs want IO time to be accounted as busy; this dependson how + * efficient idling at a higher frequency/voltage is. + * Pavel Machek says this is not so for various generations of AMD and old + * Intel systems. + * Mike Chan (androidlcom) calis this is also not true for ARM. + * Because of this, whitelist specific known (series) of CPUs by default, and + * leave all others up to the user. + */ +static int should_io_be_busy(void) +{ +#if defined(CONFIG_X86) + /* + * For Intel, Core 2 (model 15) andl later have an efficient idle. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 15) + return 1; +#endif + return 0; +} + +static void cpufreq_adaptive_idle(void) +{ + int i; + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 0); + struct cpufreq_policy *policy; + + policy = dbs_info->cur_policy; + + pm_idle_old(); + + if ((policy->cur == policy->min) || + (policy->cur == policy->max)) { + + if (timer_pending(&cpu_timer)) + return; + + if (mutex_trylock(&short_timer_mutex)) { + for_each_online_cpu(i) { + per_cpu(idle_in_idle, i) = + get_cpu_idle_time_us(i, + &per_cpu(idle_exit_wall, i)); + } + + mod_timer(&cpu_timer, jiffies + 2); + cancel_delayed_work(&dbs_info->work); + } + } else { + if (timer_pending(&cpu_timer)) + del_timer(&cpu_timer); + + } +} + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + mutex_lock(&dbs_mutex); + + rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + dbs_enable++; + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time_us(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + } + this_dbs_info->cpu = cpu; + adaptive_init_cpu(cpu); + + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + unsigned int latency; + + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + /* Bring kernel and HW constraints together */ + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); + dbs_tuners_ins.io_is_busy = should_io_be_busy(); + } + mutex_unlock(&dbs_mutex); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); + + pm_idle_old = pm_idle; + pm_idle = cpufreq_adaptive_idle; + break; + + case CPUFREQ_GOV_STOP: + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + sysfs_remove_group(&policy->kobj, &dbs_attr_group); + mutex_destroy(&this_dbs_info->timer_mutex); + dbs_enable--; + mutex_unlock(&dbs_mutex); + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + + pm_idle = pm_idle_old; + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, CPUFREQ_RELATION_L); + mutex_unlock(&this_dbs_info->timer_mutex); + break; + } + return 0; +} + +static inline void cpufreq_adaptive_update_time(void) +{ + struct cpu_dbs_info_s *this_dbs_info; + struct cpufreq_policy *policy; + int j; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, 0); + policy = this_dbs_info->cur_policy; + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + + cur_idle_time = get_cpu_idle_time_us(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); + + j_dbs_info->prev_cpu_wall = cur_wall_time; + + j_dbs_info->prev_cpu_idle = cur_idle_time; + + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + if (dbs_tuners_ins.ignore_nice) + j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + + } + +} + +static int cpufreq_adaptive_up_task(void *data) +{ + unsigned long flags; + struct cpu_dbs_info_s *this_dbs_info; + struct cpufreq_policy *policy; + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + this_dbs_info = &per_cpu(od_cpu_dbs_info, 0); + policy = this_dbs_info->cur_policy; + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + spin_lock_irqsave(&up_cpumask_lock, flags); + + if (cpumask_empty(&up_cpumask)) { + spin_unlock_irqrestore(&up_cpumask_lock, flags); + schedule(); + + if (kthread_should_stop()) + break; + + spin_lock_irqsave(&up_cpumask_lock, flags); + } + + set_current_state(TASK_RUNNING); + + cpumask_clear(&up_cpumask); + spin_unlock_irqrestore(&up_cpumask_lock, flags); + + __cpufreq_driver_target(this_dbs_info->cur_policy, + target_freq, + CPUFREQ_RELATION_H); + if (policy->cur != policy->max) { + mutex_lock(&this_dbs_info->timer_mutex); + + schedule_delayed_work_on(0, &this_dbs_info->work, delay); + mutex_unlock(&this_dbs_info->timer_mutex); + cpufreq_adaptive_update_time(); + } + if (mutex_is_locked(&short_timer_mutex)) + mutex_unlock(&short_timer_mutex); + } + + return 0; +} + +static void cpufreq_adaptive_freq_down(struct work_struct *work) +{ + unsigned long flags; + struct cpu_dbs_info_s *this_dbs_info; + struct cpufreq_policy *policy; + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + spin_lock_irqsave(&down_cpumask_lock, flags); + cpumask_clear(&down_cpumask); + spin_unlock_irqrestore(&down_cpumask_lock, flags); + + this_dbs_info = &per_cpu(od_cpu_dbs_info, 0); + policy = this_dbs_info->cur_policy; + + __cpufreq_driver_target(this_dbs_info->cur_policy, + target_freq, + CPUFREQ_RELATION_H); + + if (policy->cur != policy->min) { + mutex_lock(&this_dbs_info->timer_mutex); + + schedule_delayed_work_on(0, &this_dbs_info->work, delay); + mutex_unlock(&this_dbs_info->timer_mutex); + cpufreq_adaptive_update_time(); + } + + if (mutex_is_locked(&short_timer_mutex)) + mutex_unlock(&short_timer_mutex); +} + +static int __init cpufreq_gov_dbs_init(void) +{ + cputime64_t wall; + u64 idle_time; + int cpu = get_cpu(); + + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD; + keep_minspeed_load = DEFAULT_KEEP_MINSPEED_LOAD; + step_up_load = DEFAULT_STEPUP_LOAD; + + idle_time = get_cpu_idle_time_us(cpu, &wall); + put_cpu(); + if (idle_time != -1ULL) { + /* Idle micro accounting is supported. Use finer thresholds */ + dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_tuners_ins.down_differential = + MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + /* + * In no_hz/micro accounting case we set the minimum frequency + * not depending on HZ, but fixed (very low). The deferred + * timer might skip some samples if idle/sleeping as needed. + */ + min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + /* For correct statistics, we need 10 ticks for each measure */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + } + + init_timer(&cpu_timer); + cpu_timer.function = cpufreq_adaptive_timer; + + up_task = kthread_create(cpufreq_adaptive_up_task, NULL, + "kadaptiveup"); + + if (IS_ERR(up_task)) + return PTR_ERR(up_task); + + sched_setscheduler_nocheck(up_task, SCHED_FIFO, ¶m); + get_task_struct(up_task); + + /* No rescuer thread, bind to CPU queuing the work for possibly + warm cache (probably doesn't matter much). */ + down_wq = alloc_workqueue("kadaptive_down", 0, 1); + + if (!down_wq) + goto err_freeuptask; + + INIT_WORK(&freq_scale_down_work, cpufreq_adaptive_freq_down); + + + return cpufreq_register_governor(&cpufreq_gov_adaptive); +err_freeuptask: + put_task_struct(up_task); + return -ENOMEM; +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_adaptive); +} + + +MODULE_AUTHOR("Venkatesh Pallipadi "); +MODULE_AUTHOR("Alexey Starikovskiy "); +MODULE_DESCRIPTION("'cpufreq_adaptive' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ADAPTIVE +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/drivers/cpufreq/cpufreq_asswax.c b/drivers/cpufreq/cpufreq_asswax.c new file mode 100644 index 00000000..cd2d9333 --- /dev/null +++ b/drivers/cpufreq/cpufreq_asswax.c @@ -0,0 +1,945 @@ +/* + * drivers/cpufreq/cpufreq_asswax.c + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Rewritten by: Godmachine81 + * Worked on by: Zarboz + * Original Author: Erasmux + * + * A work in progress of merging BrazilianWax and Smartass into AssWAX! + * + * Originally Based on the interactive governor By Mike Chan (mike@android.com) + * which was adaptated to 2.6.29 kernel by Nadlabak (pavel@doshaska.net) + * + * SMP support based on mod by faux123 + * + * For a general overview of asswax see the relavent part in + * Documentation/cpu-freq/governors.txt + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/******************** Tunable parameters: ********************/ + +/* + * The "ideal" frequency to use when awake. The governor will ramp up faster + * towards the ideal frequency and slower after it has passed it. Similarly, + * lowering the frequency towards the ideal frequency is faster than below it. + */ +static unsigned int awake_ideal_freq = 594000; + +static unsigned int interactive_ideal_freq = 810000; + +static unsigned int interactive_timeout = 2; + +/* + * The "ideal" frequency to use when suspended. + * When set to 0, the governor will not track the suspended state (meaning + * that practically when sleep_ideal_freq==0 the awake_ideal_freq is used + * also when suspended). + */ +static unsigned int sleep_ideal_freq = 384000; + +/* + * Freqeuncy delta when ramping up above the ideal freqeuncy. + * Zero disables and causes to always jump straight to max frequency. + * When below the ideal freqeuncy we always ramp up to the ideal freq. + */ +static unsigned int ramp_up_step = 192000; + +/* + * Freqeuncy delta when ramping down below the ideal freqeuncy. + * Zero disables and will calculate ramp down according to load heuristic. + * When above the ideal freqeuncy we always ramp down to the ideal freq. + */ +static unsigned int ramp_down_step = 0; + +/* + * CPU freq will be increased if measured load > max_cpu_load; + */ +static unsigned long max_cpu_load = 85; + +/* + * CPU freq will be decreased if measured load < min_cpu_load; + */ +static unsigned long min_cpu_load = 45; + +/* + * The minimum amount of time to spend at a frequency before we can ramp up. + * Notice we ignore this when we are below the ideal frequency. + */ +static unsigned long up_rate_us = 10000; + +/* + * The minimum amount of time to spend at a frequency before we can ramp down. + * Notice we ignore this when we are above the ideal frequency. + */ +static unsigned long down_rate_us = 20000; + +/* + * The frequency to set when waking up from sleep. + * When sleep_ideal_freq=0 this will have no effect. + */ +static unsigned int sleep_wakeup_freq = 151200; // typo? -dm + +/* + * Sampling rate, I highly recommend to leave it at 2. + */ +static unsigned int sample_rate_jiffies = 2; + +/*************** End of tunables ***************/ + +static atomic_t active_count = ATOMIC_INIT(0); + +struct asswax_info_s { + struct cpufreq_policy *cur_policy; + struct cpufreq_frequency_table *freq_table; + struct timer_list timer; + u64 time_in_idle; + u64 idle_exit_time; + u64 freq_change_time; + u64 freq_change_time_in_idle; + int cur_cpu_load; + int old_freq; + int ramp_dir; + unsigned int enable; + int ideal_speed; +}; +static DEFINE_PER_CPU(struct asswax_info_s, asswax_info); + +/* Workqueues handle frequency scaling */ +static struct workqueue_struct *up_wq; +static struct workqueue_struct *down_wq; +static struct work_struct freq_scale_work; + +static cpumask_t work_cpumask; +static spinlock_t cpumask_lock; + +static unsigned int asswax_state = 1; // 0 = suspend, 1 = awake, 2 = interactive, 3 = touched + +//#define DEBUG +#ifndef DEBUG +#define dprintk(x...) do { } while (0) +#else +#define dprintk(flag,msg...) do { \ + if (debug_mask & flag) printk(KERN_DEBUG msg); \ + } while (0) + +enum { + ASSWAX_DEBUG_JUMPS=1, + ASSWAX_DEBUG_LOAD=2, + ASSWAX_DEBUG_ALG=4 +}; + +/* + * Combination of the above debug flags. + */ +static unsigned long debug_mask = 7; +#endif + +static int cpufreq_governor_asswax(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ASSWAX +static +#endif +struct cpufreq_governor cpufreq_gov_asswax = { + .name = "asswax", + .governor = cpufreq_governor_asswax, + .max_transition_latency = 9000000, + .owner = THIS_MODULE, +}; + +static void asswax_update_min_max(struct asswax_info_s *this_asswax, struct cpufreq_policy *policy, int state) { + int tmp = 0; + dprintk(ASSWAX_DEBUG_ALG, "asswax entering state %i on cpu %u\n", state, policy->cpu); + switch (state) { + case 0: + tmp = sleep_ideal_freq; + break; + case 1: + tmp = awake_ideal_freq; + break; + case 2: + case 3: + tmp = interactive_ideal_freq; + break; + } + this_asswax->ideal_speed = + policy->max > tmp ? (tmp > policy->min ? tmp : policy->min) : policy->max; +} + +static void asswax_update_min_max_allcpus(void) { + unsigned int i; + for_each_online_cpu(i) { + struct asswax_info_s *this_asswax = &per_cpu(asswax_info, i); + if (this_asswax->enable) + asswax_update_min_max(this_asswax,this_asswax->cur_policy,asswax_state); + } +} + +inline static unsigned int validate_freq(struct cpufreq_policy *policy, int freq) { + if (freq > (int)policy->max) + return policy->max; + if (freq < (int)policy->min) + return policy->min; + return freq; +} + +inline static void reset_timer(unsigned long cpu, struct asswax_info_s *this_asswax) { + this_asswax->time_in_idle = get_cpu_idle_time_us(cpu, &this_asswax->idle_exit_time); + mod_timer(&this_asswax->timer, jiffies + sample_rate_jiffies); +} + +inline static void work_cpumask_set(unsigned long cpu) { + unsigned long flags; + spin_lock_irqsave(&cpumask_lock, flags); + cpumask_set_cpu(cpu, &work_cpumask); + spin_unlock_irqrestore(&cpumask_lock, flags); +} + +inline static int work_cpumask_test_and_clear(unsigned long cpu) { + unsigned long flags; + int res = 0; + spin_lock_irqsave(&cpumask_lock, flags); + res = cpumask_test_and_clear_cpu(cpu, &work_cpumask); + spin_unlock_irqrestore(&cpumask_lock, flags); + return res; +} + +static void do_disable_interaction(unsigned long data) { + asswax_state = 1; + asswax_update_min_max_allcpus(); +} +static DEFINE_TIMER(interaction_timer, do_disable_interaction, 0, 0); +static inline void begin_interaction_timeout(void) { + mod_timer(&interaction_timer, jiffies + interactive_timeout); +} +static inline void end_interaction_timeout(void) { + if (timer_pending(&interaction_timer)) + del_timer(&interaction_timer); +} + + +inline static int target_freq(struct cpufreq_policy *policy, struct asswax_info_s *this_asswax, + int new_freq, int old_freq, int prefered_relation) { + int index, target; + struct cpufreq_frequency_table *table = this_asswax->freq_table; + + if (new_freq == old_freq) + return 0; + new_freq = validate_freq(policy,new_freq); + if (new_freq == old_freq) + return 0; + + if (table && + !cpufreq_frequency_table_target(policy,table,new_freq,prefered_relation,&index)) + { + target = table[index].frequency; + if (target == old_freq) { + // if for example we are ramping up to *at most* current + ramp_up_step + // but there is no such frequency higher than the current, try also + // to ramp up to *at least* current + ramp_up_step. + if (new_freq > old_freq && prefered_relation==CPUFREQ_RELATION_H + && !cpufreq_frequency_table_target(policy,table,new_freq, + CPUFREQ_RELATION_L,&index)) + target = table[index].frequency; + // simlarly for ramping down: + else if (new_freq < old_freq && prefered_relation==CPUFREQ_RELATION_L + && !cpufreq_frequency_table_target(policy,table,new_freq, + CPUFREQ_RELATION_H,&index)) + target = table[index].frequency; + } + + if (target == old_freq) { + // We should not get here: + // If we got here we tried to change to a validated new_freq which is different + // from old_freq, so there is no reason for us to remain at same frequency. + printk(KERN_WARNING "Smartass: frequency change failed: %d to %d => %d\n", + old_freq,new_freq,target); + return 0; + } + } + else target = new_freq; + + __cpufreq_driver_target(policy, target, prefered_relation); + + dprintk(ASSWAX_DEBUG_JUMPS,"SmartassQ: jumping from %d to %d => %d (%d)\n", + old_freq,new_freq,target,policy->cur); + + return target; +} + +static void cpufreq_asswax_timer(unsigned long cpu) +{ + u64 delta_idle; + u64 delta_time; + int cpu_load; + int old_freq; + u64 update_time; + u64 now_idle; + int queued_work = 0; + struct asswax_info_s *this_asswax = &per_cpu(asswax_info, cpu); + struct cpufreq_policy *policy = this_asswax->cur_policy; + + now_idle = get_cpu_idle_time_us(cpu, &update_time); + old_freq = policy->cur; + + if (this_asswax->idle_exit_time == 0 || update_time == this_asswax->idle_exit_time) + return; + + delta_idle = (now_idle - this_asswax->time_in_idle); + delta_time = (update_time - this_asswax->idle_exit_time); + + // If timer ran less than 1ms after short-term sample started, retry. + if (delta_time < 1000) { + if (!timer_pending(&this_asswax->timer)) + reset_timer(cpu,this_asswax); + return; + } + + if (delta_idle > delta_time) + cpu_load = 0; + else + cpu_load = 100 * (unsigned int)(delta_time - delta_idle) / (unsigned int)delta_time; + + dprintk(ASSWAX_DEBUG_LOAD,"asswaxT @ %d: load %d (delta_time %llu)\n", + old_freq,cpu_load,delta_time); + + this_asswax->cur_cpu_load = cpu_load; + this_asswax->old_freq = old_freq; + + // Scale up if load is above max or if there where no idle cycles since coming out of idle, + // additionally, if we are at or above the ideal_speed, verify we have been at this frequency + // for at least up_rate_us: + if (cpu_load > max_cpu_load || delta_idle == 0) + { + if (old_freq < policy->max && + (old_freq < this_asswax->ideal_speed || delta_idle == 0 || + (update_time - this_asswax->freq_change_time) >= up_rate_us)) + { + dprintk(ASSWAX_DEBUG_ALG,"asswaxT @ %d ramp up: load %d (delta_idle %llu)\n", + old_freq,cpu_load,delta_idle); + this_asswax->ramp_dir = 1; + work_cpumask_set(cpu); + queue_work(up_wq, &freq_scale_work); + queued_work = 1; + if (asswax_state == 2 && old_freq == this_asswax->ideal_speed) + end_interaction_timeout(); + } + else this_asswax->ramp_dir = 0; + } + // Similarly for scale down: load should be below min and if we are at or below ideal + // frequency we require that we have been at this frequency for at least down_rate_us: + else if (cpu_load < min_cpu_load && old_freq > policy->min && + (old_freq > this_asswax->ideal_speed || + (update_time - this_asswax->freq_change_time) >= down_rate_us)) + { + dprintk(ASSWAX_DEBUG_ALG,"asswaxT @ %d ramp down: load %d (delta_idle %llu)\n", + old_freq,cpu_load,delta_idle); + this_asswax->ramp_dir = -1; + work_cpumask_set(cpu); + queue_work(down_wq, &freq_scale_work); + queued_work = 1; + } + else this_asswax->ramp_dir = 0; + + // To avoid unnecessary load when the CPU is already at high load, we don't + // reset ourselves if we are at max speed. If and when there are idle cycles, + // the idle loop will activate the timer. + // Additionally, if we queued some work, the work task will reset the timer + // after it has done its adjustments. + if (!queued_work && old_freq < policy->max) + reset_timer(cpu,this_asswax); +} + +static int cpufreq_idle_notifier(struct notifier_block *nb, + unsigned long val, void *data) { + struct asswax_info_s *this_asswax = &per_cpu(asswax_info, smp_processor_id()); + struct cpufreq_policy *policy = this_asswax->cur_policy; + + if (!this_asswax->enable) + return NOTIFY_DONE; + + if (val == IDLE_START) { + if (policy->cur == policy->max && !timer_pending(&this_asswax->timer)) { + reset_timer(smp_processor_id(), this_asswax); + } else if (policy->cur == policy->min) { + if (timer_pending(&this_asswax->timer)) + del_timer(&this_asswax->timer); + else if (asswax_state == 2) + begin_interaction_timeout(); + } + } else if (val == IDLE_END) { + if (policy->cur == policy->min && !timer_pending(&this_asswax->timer)) + reset_timer(smp_processor_id(), this_asswax); + } + + return NOTIFY_OK; +} +static struct notifier_block cpufreq_idle_nb = { + .notifier_call = cpufreq_idle_notifier, +}; + +/* We use the same work function to sale up and down */ +static void cpufreq_asswax_freq_change_time_work(struct work_struct *work) +{ + unsigned int cpu; + int new_freq; + int old_freq; + int ramp_dir; + struct asswax_info_s *this_asswax; + struct cpufreq_policy *policy; + unsigned int relation = CPUFREQ_RELATION_L; + for_each_possible_cpu(cpu) { + if (!work_cpumask_test_and_clear(cpu)) + continue; + this_asswax = &per_cpu(asswax_info, cpu); + + ramp_dir = this_asswax->ramp_dir; + this_asswax->ramp_dir = 0; + + old_freq = this_asswax->old_freq; + policy = this_asswax->cur_policy; + + if (old_freq != policy->cur) { + // frequency was changed by someone else? + // Removing printk to prevent dmesg flooding while using CPU Master or other 3rd Party Cpu freq profilers + //printk(KERN_WARNING "Smartass: frequency changed by 3rd party: %d to %d\n", + // old_freq,policy->cur); + new_freq = old_freq; + } + else if (ramp_dir > 0 && nr_running() > 1) { + // ramp up logic: + if (old_freq < this_asswax->ideal_speed) + new_freq = this_asswax->ideal_speed; + else if (ramp_up_step) { + new_freq = old_freq + ramp_up_step; + relation = CPUFREQ_RELATION_H; + } + else { + new_freq = policy->max; + relation = CPUFREQ_RELATION_H; + } + dprintk(ASSWAX_DEBUG_ALG,"asswaxQ @ %d ramp up: ramp_dir=%d ideal=%d\n", + old_freq,ramp_dir,this_asswax->ideal_speed); + } + else if (ramp_dir < 0) { + // ramp down logic: + if (old_freq > this_asswax->ideal_speed) { + new_freq = this_asswax->ideal_speed; + relation = CPUFREQ_RELATION_H; + } + else if (ramp_down_step) + new_freq = old_freq - ramp_down_step; + else { + // Load heuristics: Adjust new_freq such that, assuming a linear + // scaling of load vs. frequency, the load in the new frequency + // will be max_cpu_load: + new_freq = old_freq * this_asswax->cur_cpu_load / max_cpu_load; + if (new_freq > old_freq) // min_cpu_load > max_cpu_load ?! + new_freq = old_freq -1; + } + dprintk(ASSWAX_DEBUG_ALG,"asswaxQ @ %d ramp down: ramp_dir=%d ideal=%d\n", + old_freq,ramp_dir,this_asswax->ideal_speed); + } + else { // ramp_dir==0 ?! Could the timer change its mind about a queued ramp up/down + // before the work task gets to run? + // This may also happen if we refused to ramp up because the nr_running()==1 + new_freq = old_freq; + dprintk(ASSWAX_DEBUG_ALG,"asswaxQ @ %d nothing: ramp_dir=%d nr_running=%lu\n", + old_freq,ramp_dir,nr_running()); + } + + // do actual ramp up (returns 0, if frequency change failed): + new_freq = target_freq(policy,this_asswax,new_freq,old_freq,relation); + if (new_freq) + this_asswax->freq_change_time_in_idle = + get_cpu_idle_time_us(cpu,&this_asswax->freq_change_time); + + // reset timer: + if (new_freq < policy->max) + reset_timer(cpu,this_asswax); + // if we are maxed out, it is pointless to use the timer + // (idle cycles wake up the timer when the timer comes) + else if (timer_pending(&this_asswax->timer)) + del_timer(&this_asswax->timer); + + cpufreq_notify_utilization(policy, + (this_asswax->cur_cpu_load * policy->cur) / policy->max); + } +} + +#ifdef DEBUG +static ssize_t show_debug_mask(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", debug_mask); +} + +static ssize_t store_debug_mask(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0) + debug_mask = input; + return count; +} +#endif + +static ssize_t show_up_rate_us(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", up_rate_us); +} + +static ssize_t store_up_rate_us(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input >= 0 && input <= 100000000) + up_rate_us = input; + return count; +} + +static ssize_t show_down_rate_us(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", down_rate_us); +} + +static ssize_t store_down_rate_us(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input >= 0 && input <= 100000000) + down_rate_us = input; + return count; +} + +static ssize_t show_sleep_ideal_freq(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", sleep_ideal_freq); +} + +static ssize_t store_sleep_ideal_freq(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input >= 0) { + sleep_ideal_freq = input; + if (asswax_state == 0) + asswax_update_min_max_allcpus(); + } + return count; +} + +static ssize_t show_sleep_wakeup_freq(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", sleep_wakeup_freq); +} + +static ssize_t store_sleep_wakeup_freq(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input >= 0) + sleep_wakeup_freq = input; + return count; +} + +static ssize_t show_awake_ideal_freq(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", awake_ideal_freq); +} + +static ssize_t store_awake_ideal_freq(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input >= 0) { + awake_ideal_freq = input; + if (asswax_state == 1) + asswax_update_min_max_allcpus(); + } + return count; +} + +static ssize_t show_interactive_ideal_freq(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", interactive_ideal_freq); +} + +static ssize_t store_interactive_ideal_freq(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input >= 0) { + interactive_ideal_freq = input; + if (asswax_state == 1) + asswax_update_min_max_allcpus(); + } + return count; +} + +static ssize_t show_interactive_timeout_jiffies(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", interactive_timeout); +} + +static ssize_t store_interactive_timeout_jiffies(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input >= 0) { + interactive_timeout = input; + if (asswax_state == 1) + asswax_update_min_max_allcpus(); + } + return count; +} + +static ssize_t show_sample_rate_jiffies(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", sample_rate_jiffies); +} + +static ssize_t store_sample_rate_jiffies(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input > 0 && input <= 1000) + sample_rate_jiffies = input; + return count; +} + +static ssize_t show_ramp_up_step(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", ramp_up_step); +} + +static ssize_t store_ramp_up_step(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input >= 0) + ramp_up_step = input; + return count; +} + +static ssize_t show_ramp_down_step(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", ramp_down_step); +} + +static ssize_t store_ramp_down_step(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input >= 0) + ramp_down_step = input; + return count; +} + +static ssize_t show_max_cpu_load(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", max_cpu_load); +} + +static ssize_t store_max_cpu_load(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input > 0 && input <= 100) + max_cpu_load = input; + return count; +} + +static ssize_t show_min_cpu_load(struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", min_cpu_load); +} + +static ssize_t store_min_cpu_load(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) +{ + ssize_t res; + unsigned long input; + res = strict_strtoul(buf, 0, &input); + if (res >= 0 && input > 0 && input < 100) + min_cpu_load = input; + return count; +} + +#define define_global_rw_attr(_name) \ +static struct global_attr _name##_attr = \ + __ATTR(_name, 0644, show_##_name, store_##_name) + +#ifdef DEBUG +define_global_rw_attr(debug_mask); +#endif +define_global_rw_attr(up_rate_us); +define_global_rw_attr(down_rate_us); +define_global_rw_attr(sleep_ideal_freq); +define_global_rw_attr(sleep_wakeup_freq); +define_global_rw_attr(awake_ideal_freq); +define_global_rw_attr(interactive_ideal_freq); +define_global_rw_attr(interactive_timeout_jiffies); +define_global_rw_attr(sample_rate_jiffies); +define_global_rw_attr(ramp_up_step); +define_global_rw_attr(ramp_down_step); +define_global_rw_attr(max_cpu_load); +define_global_rw_attr(min_cpu_load); + +static struct attribute * asswax_attributes[] = { +#ifdef DEBUG + &debug_mask_attr.attr, +#endif + &up_rate_us_attr.attr, + &down_rate_us_attr.attr, + &sleep_ideal_freq_attr.attr, + &sleep_wakeup_freq_attr.attr, + &awake_ideal_freq_attr.attr, + &interactive_ideal_freq_attr.attr, + &interactive_timeout_jiffies_attr.attr, + &sample_rate_jiffies_attr.attr, + &ramp_up_step_attr.attr, + &ramp_down_step_attr.attr, + &max_cpu_load_attr.attr, + &min_cpu_load_attr.attr, + NULL, +}; + +static struct attribute_group asswax_attr_group = { + .attrs = asswax_attributes, + .name = "asswax", +}; + +static int cpufreq_governor_asswax(struct cpufreq_policy *new_policy, + unsigned int event) +{ + unsigned int cpu = new_policy->cpu; + int rc; + struct asswax_info_s *this_asswax = &per_cpu(asswax_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!new_policy->cur)) + return -EINVAL; + + this_asswax->cur_policy = new_policy; + + this_asswax->enable = 1; + + asswax_update_min_max(this_asswax,new_policy,asswax_state); + + this_asswax->freq_table = cpufreq_frequency_get_table(cpu); + if (!this_asswax->freq_table) + printk(KERN_WARNING "Smartass: no frequency table for cpu %d?!\n",cpu); + + // Do not register the idle hook and create sysfs + // entries if we have already done so. + if (atomic_inc_return(&active_count) <= 1) { + rc = sysfs_create_group(cpufreq_global_kobject, + &asswax_attr_group); + if (rc) + return rc; + + idle_notifier_register(&cpufreq_idle_nb); + } + + //if (this_asswax->cur_policy->cur < new_policy->max && !timer_pending(&this_asswax->timer)) + if (!timer_pending(&this_asswax->timer)) + reset_timer(cpu,this_asswax); + + break; + + case CPUFREQ_GOV_LIMITS: + asswax_update_min_max(this_asswax,new_policy,asswax_state); + + if (this_asswax->cur_policy->cur > new_policy->max) { + dprintk(ASSWAX_DEBUG_JUMPS,"SmartassI: jumping to new max freq: %d\n",new_policy->max); + __cpufreq_driver_target(this_asswax->cur_policy, + new_policy->max, CPUFREQ_RELATION_H); + } + else if (this_asswax->cur_policy->cur < new_policy->min) { + dprintk(ASSWAX_DEBUG_JUMPS,"SmartassI: jumping to new min freq: %d\n",new_policy->min); + __cpufreq_driver_target(this_asswax->cur_policy, + new_policy->min, CPUFREQ_RELATION_L); + } + + if (this_asswax->cur_policy->cur < new_policy->max && !timer_pending(&this_asswax->timer)) + reset_timer(cpu,this_asswax); + + break; + + case CPUFREQ_GOV_STOP: + this_asswax->enable = 0; + del_timer(&this_asswax->timer); + flush_work(&freq_scale_work); + this_asswax->idle_exit_time = 0; + + if (atomic_dec_return(&active_count) < 1) { + sysfs_remove_group(cpufreq_global_kobject, + &asswax_attr_group); + idle_notifier_unregister(&cpufreq_idle_nb); + } + break; + } + + return 0; +} + +static void asswax_suspend(int cpu, int suspend) +{ + struct asswax_info_s *this_asswax = &per_cpu(asswax_info, smp_processor_id()); + struct cpufreq_policy *policy = this_asswax->cur_policy; + unsigned int new_freq; + + if (!this_asswax->enable) + return; + + asswax_update_min_max(this_asswax,policy,suspend); + if (!suspend) { // resume at max speed: + new_freq = validate_freq(policy,sleep_wakeup_freq); + + dprintk(ASSWAX_DEBUG_JUMPS,"SmartassS: awaking at %d\n",new_freq); + + __cpufreq_driver_target(policy, new_freq, + CPUFREQ_RELATION_L); + } else { + // to avoid wakeup issues with quick sleep/wakeup don't change actual frequency when entering sleep + // to allow some time to settle down. Instead we just reset our statistics (and reset the timer). + // Eventually, the timer will adjust the frequency if necessary. + + this_asswax->freq_change_time_in_idle = + get_cpu_idle_time_us(cpu,&this_asswax->freq_change_time); + + dprintk(ASSWAX_DEBUG_JUMPS,"SmartassS: suspending at %d\n",policy->cur); + } + + reset_timer(smp_processor_id(),this_asswax); +} + +static void asswax_early_suspend(struct early_suspend *handler) { + int i; + if (asswax_state == 0 || sleep_ideal_freq==0) // disable behavior for sleep_ideal_freq==0 + return; + asswax_state = 0; + for_each_online_cpu(i) + asswax_suspend(i,0); +} + +static void asswax_late_resume(struct early_suspend *handler) { + int i; + if (asswax_state > 0) // already not suspended so nothing to do + return; + asswax_state = 1; + for_each_online_cpu(i) + asswax_suspend(i,1); +} + +static struct early_suspend asswax_power_suspend = { + .suspend = asswax_early_suspend, + .resume = asswax_late_resume, +#ifdef CONFIG_MACH_HERO + .level = EARLY_SUSPEND_LEVEL_DISABLE_FB + 1, +#endif +}; + +static int __init cpufreq_asswax_init(void) +{ + unsigned int i; + struct asswax_info_s *this_asswax; + + spin_lock_init(&cpumask_lock); + + /* Initalize per-cpu data: */ + for_each_possible_cpu(i) { + this_asswax = &per_cpu(asswax_info, i); + this_asswax->enable = 0; + this_asswax->cur_policy = 0; + this_asswax->ramp_dir = 0; + this_asswax->time_in_idle = 0; + this_asswax->idle_exit_time = 0; + this_asswax->freq_change_time = 0; + this_asswax->freq_change_time_in_idle = 0; + this_asswax->cur_cpu_load = 0; + // intialize timer: + init_timer_deferrable(&this_asswax->timer); + this_asswax->timer.function = cpufreq_asswax_timer; + this_asswax->timer.data = i; + work_cpumask_test_and_clear(i); + } + + // Scale up is high priority + up_wq = create_workqueue("kasswax_up"); + down_wq = create_workqueue("kasswax_down"); + if (!up_wq || !down_wq) + return -ENOMEM; + + INIT_WORK(&freq_scale_work, cpufreq_asswax_freq_change_time_work); + + register_early_suspend(&asswax_power_suspend); + + return cpufreq_register_governor(&cpufreq_gov_asswax); +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ASSWAX +fs_initcall(cpufreq_asswax_init); +#else +module_init(cpufreq_asswax_init); +#endif + +static void __exit cpufreq_asswax_exit(void) +{ + end_interaction_timeout(); + cpufreq_unregister_governor(&cpufreq_gov_asswax); + destroy_workqueue(up_wq); + destroy_workqueue(down_wq); +} + +module_exit(cpufreq_asswax_exit); + +MODULE_AUTHOR ("godmachine81 rewrite- original author of Smartass and Brazilian Wax - Erasmux"); +MODULE_DESCRIPTION ("'cpufreq_asswax' - A combination of Brazilian Wax and Smartass"); +MODULE_LICENSE ("GPL"); \ No newline at end of file diff --git a/drivers/cpufreq/cpufreq_badass.c b/drivers/cpufreq/cpufreq_badass.c index eb3f433a..45b14d1e 100644 --- a/drivers/cpufreq/cpufreq_badass.c +++ b/drivers/cpufreq/cpufreq_badass.c @@ -188,12 +188,12 @@ static struct bds_tuners { #endif }; -static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, - cputime64_t *wall) +static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, + u64 *wall) { - cputime64_t idle_time; - cputime64_t cur_wall_time; - cputime64_t busy_time; + u64 idle_time; + u64 cur_wall_time; + u64 busy_time; cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); @@ -204,11 +204,11 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; - idle_time = (cur_wall_time - busy_time); + idle_time = cur_wall_time - busy_time; if (wall) - *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); + *wall = jiffies_to_usecs(cur_wall_time); - return (cputime64_t)jiffies_to_usecs(idle_time); + return jiffies_to_usecs(idle_time); } static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) @@ -830,7 +830,11 @@ int set_three_phase_freq_badass(int cpufreq) static void bds_check_cpu(struct cpu_bds_info_s *this_bds_info) { + /* Extrapolated load of this CPU */ + unsigned int load_at_max_freq = 0; unsigned int max_load_freq; + /* Current load across this CPU */ + unsigned int cur_load = 0; struct cpufreq_policy *policy; unsigned int j; @@ -866,7 +870,7 @@ static void bds_check_cpu(struct cpu_bds_info_s *this_bds_info) struct cpu_bds_info_s *j_bds_info; cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; unsigned int idle_time, wall_time, iowait_time; - unsigned int load, load_freq; + unsigned int load_freq; int freq_avg; j_bds_info = &per_cpu(od_cpu_bds_info, j); @@ -874,20 +878,24 @@ static void bds_check_cpu(struct cpu_bds_info_s *this_bds_info) cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); - wall_time = (unsigned int) (cur_wall_time - j_bds_info->prev_cpu_wall); + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + j_bds_info->prev_cpu_wall); j_bds_info->prev_cpu_wall = cur_wall_time; - idle_time = (unsigned int) (cur_idle_time - j_bds_info->prev_cpu_idle); + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + j_bds_info->prev_cpu_idle); j_bds_info->prev_cpu_idle = cur_idle_time; - iowait_time = (unsigned int) (cur_iowait_time - j_bds_info->prev_cpu_iowait); + iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, + j_bds_info->prev_cpu_iowait); j_bds_info->prev_cpu_iowait = cur_iowait_time; if (bds_tuners_ins.ignore_nice) { cputime64_t cur_nice; unsigned long cur_nice_jiffies; - cur_nice = (kcpustat_cpu(j).cpustat[CPUTIME_NICE] - j_bds_info->prev_cpu_nice); + cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - + j_bds_info->prev_cpu_nice; /* * Assumption: nice time between sampling periods will * be less than 2^32 jiffies for 32 bit sys @@ -912,16 +920,20 @@ static void bds_check_cpu(struct cpu_bds_info_s *this_bds_info) if (unlikely(!wall_time || wall_time < idle_time)) continue; - load = 100 * (wall_time - idle_time) / wall_time; + cur_load = 100 * (wall_time - idle_time) / wall_time; freq_avg = __cpufreq_driver_getavg(policy, j); if (freq_avg <= 0) freq_avg = policy->cur; - load_freq = load * freq_avg; + load_freq = cur_load * freq_avg; if (load_freq > max_load_freq) max_load_freq = load_freq; } + /* calculate the scaled load across CPU */ + load_at_max_freq = (cur_load * policy->cur)/policy->cpuinfo.max_freq; + + cpufreq_notify_utilization(policy, load_at_max_freq); /* Check for frequency increase */ if (max_load_freq > bds_tuners_ins.up_threshold * policy->cur) { diff --git a/drivers/cpufreq/cpufreq_dancedance.c b/drivers/cpufreq/cpufreq_dancedance.c new file mode 100644 index 00000000..0811bd95 --- /dev/null +++ b/drivers/cpufreq/cpufreq_dancedance.c @@ -0,0 +1,650 @@ +/* + * drivers/cpufreq/cpufreq_dancedance.c + * + * Copyright (C) 2012 Shaun Nuzzo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_FREQUENCY_UP_THRESHOLD (90) +#define DEF_FREQUENCY_DOWN_THRESHOLD (30) +#define MIN_SAMPLING_RATE_RATIO (2) + +static unsigned int min_sampling_rate; + +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (100) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (10) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_DANCEDANCE +static +#endif +struct cpufreq_governor cpufreq_gov_dancedance = { + .name = "dancedance", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct cpufreq_frequency_table *freq_table; + unsigned int down_skip; + unsigned int requested_freq; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; + unsigned int rate_mult; + int cpu; + unsigned int sample_type:1; + unsigned long long prev_idletime; + unsigned long long prev_idleusage; + unsigned int enable:1; + struct mutex timer_mutex; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * dbs_mutex protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int ignore_nice; + unsigned int sampling_down_factor; + unsigned int powersave_bias; + unsigned int io_is_busy; + unsigned int target_residency; + unsigned int allowed_misses; + unsigned int freq_step; + unsigned int down_threshold; +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .ignore_nice = 0, + .freq_step = 5, +}; + +static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, + u64 *wall) +{ + u64 idle_time; + u64 cur_wall_time; + u64 busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + + idle_time = cur_wall_time - busy_time; + if (wall) + *wall = jiffies_to_usecs(cur_wall_time); + + return jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + + return idle_time; +} + +/* keep track of frequency transitions */ +static int +dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info, + freq->cpu); + + struct cpufreq_policy *policy; + + if (!this_dbs_info->enable) + return 0; + + policy = this_dbs_info->cur_policy; + + /* + * we only care if our internally tracked freq moves outside + * the 'valid' ranges of freqency available to us otherwise + * we do not change it + */ + if (this_dbs_info->requested_freq > policy->max + || this_dbs_info->requested_freq < policy->min) + this_dbs_info->requested_freq = freq->new; + + return 0; +} + +static struct notifier_block dbs_cpufreq_notifier_block = { + .notifier_call = dbs_cpufreq_notifier +}; + +/************************** sysfs interface ************************/ +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", min_sampling_rate); +} + +define_one_global_ro(sampling_rate_min); + +/* cpufreq_dancedance Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(sampling_down_factor, sampling_down_factor); +show_one(up_threshold, up_threshold); +show_one(down_threshold, down_threshold); +show_one(ignore_nice_load, ignore_nice); +show_one(freq_step, freq_step); + +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + + dbs_tuners_ins.sampling_down_factor = input; + return count; +} + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + return count; +} + +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > 100 || + input <= dbs_tuners_ins.down_threshold) + return -EINVAL; + + dbs_tuners_ins.up_threshold = input; + return count; +} + +static ssize_t store_down_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + /* cannot be lower than 11 otherwise freq will not fall */ + if (ret != 1 || input < 11 || input > 100 || + input >= dbs_tuners_ins.up_threshold) + return -EINVAL; + + dbs_tuners_ins.down_threshold = input; + return count; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == dbs_tuners_ins.ignore_nice) /* nothing to do */ + return count; + + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(cs_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + return count; +} + +static ssize_t store_freq_step(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 100) + input = 100; + + /* no need to test here if freq_step is zero as the user might actually + * want this, they would be crazy though :) */ + dbs_tuners_ins.freq_step = input; + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(sampling_down_factor); +define_one_global_rw(up_threshold); +define_one_global_rw(down_threshold); +define_one_global_rw(ignore_nice_load); +define_one_global_rw(freq_step); + +static struct attribute *dbs_attributes[] = { + &sampling_rate_min.attr, + &sampling_rate.attr, + &sampling_down_factor.attr, + &up_threshold.attr, + &down_threshold.attr, + &ignore_nice_load.attr, + &freq_step.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "dancedance", +}; + +/************************** sysfs end ************************/ + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int load = 0; + unsigned int max_load = 0; + unsigned int freq_target; + + struct cpufreq_policy *policy; + unsigned int j; + + policy = this_dbs_info->cur_policy; + + /* + * Every sampling_rate, we check, if current idle time is less + * than 20% (default), then we try to increase frequency + * Every sampling_rate*sampling_down_factor, we check, if current + * idle time is more than 80%, then we try to decrease frequency + * + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of maximum frequency + */ + + /* Get Absolute Load */ + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time; + unsigned int idle_time, wall_time; + + j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + + wall_time = (unsigned int) (cur_wall_time - j_dbs_info->prev_cpu_wall); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) (cur_idle_time - j_dbs_info->prev_cpu_idle); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - + j_dbs_info->prev_cpu_nice; + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + + if (load > max_load) + max_load = load; + } + + /* + * break out if we 'cannot' reduce the speed as the user might + * want freq_step to be zero + */ + if (dbs_tuners_ins.freq_step == 0) + return; + + /* Check for frequency increase */ + if (max_load > dbs_tuners_ins.up_threshold) { + this_dbs_info->down_skip = 0; + + /* if we are already at full speed then break out early */ + if (this_dbs_info->requested_freq == policy->max) + return; + + freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; + + /* max freq cannot be less than 100. But who knows.... */ + if (unlikely(freq_target == 0)) + freq_target = 5; + + this_dbs_info->requested_freq += freq_target; + if (this_dbs_info->requested_freq > policy->max) + this_dbs_info->requested_freq = policy->max; + + __cpufreq_driver_target(policy, this_dbs_info->requested_freq, + CPUFREQ_RELATION_H); + return; + } + + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus 10 points under the threshold. + */ + if (max_load < (dbs_tuners_ins.down_threshold - 10)) { + freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; + + this_dbs_info->requested_freq -= freq_target; + if (this_dbs_info->requested_freq < policy->min) + this_dbs_info->requested_freq = policy->min; + + /* + * if we cannot reduce the frequency anymore, break out early + */ + if (policy->cur == policy->min) + return; + + __cpufreq_driver_target(policy, this_dbs_info->requested_freq, + CPUFREQ_RELATION_H); + return; + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + int sample_type = dbs_info->sample_type; + + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + delay -= jiffies % delay; + + mutex_lock(&dbs_info->timer_mutex); + + /* Common NORMAL_SAMPLE setup */ + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + if (!dbs_tuners_ins.powersave_bias || + sample_type == DBS_NORMAL_SAMPLE) { + dbs_check_cpu(dbs_info); + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = DBS_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; + } else { + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate + * dbs_info->rate_mult); + if (num_online_cpus() > 1) + delay -= jiffies % delay; + } + } else { + __cpufreq_driver_target(dbs_info->cur_policy, + dbs_info->freq_lo, CPUFREQ_RELATION_H); + delay = dbs_info->freq_lo_jiffies; + } + + schedule_delayed_work_on(cpu, &dbs_info->work, delay); + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + dbs_info->enable = 1; + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + dbs_info->enable = 0; + cancel_delayed_work_sync(&dbs_info->work); +} + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + mutex_lock(&dbs_mutex); + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + } + this_dbs_info->down_skip = 0; + this_dbs_info->requested_freq = policy->cur; + + mutex_init(&this_dbs_info->timer_mutex); + dbs_enable++; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + unsigned int latency; + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + /* + * conservative does not implement micro like ondemand + * governor, thus we are bound to jiffes/HZ + */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + /* Bring kernel and HW constraints together */ + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); + + cpufreq_register_notifier( + &dbs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + mutex_unlock(&dbs_mutex); + + dbs_timer_init(this_dbs_info); + + break; + + case CPUFREQ_GOV_STOP: + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + dbs_enable--; + mutex_destroy(&this_dbs_info->timer_mutex); + + /* + * Stop the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 0) + cpufreq_unregister_notifier( + &dbs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + + mutex_unlock(&dbs_mutex); + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target( + this_dbs_info->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target( + this_dbs_info->cur_policy, + policy->min, CPUFREQ_RELATION_L); + mutex_unlock(&this_dbs_info->timer_mutex); + + break; + } + return 0; +} + +static int __init cpufreq_gov_dbs_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_dancedance); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_dancedance); +} + +MODULE_AUTHOR("Shaun Nuzzo "); +MODULE_DESCRIPTION("'cpufreq_dancedance' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors " + "optimised for use in a battery environment" + "Modified code based off conservative with a faster" + "deep sleep rate"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_DANCEDANCE +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/drivers/cpufreq/cpufreq_nightmare.c b/drivers/cpufreq/cpufreq_nightmare.c new file mode 100644 index 00000000..ece971ca --- /dev/null +++ b/drivers/cpufreq/cpufreq_nightmare.c @@ -0,0 +1,1656 @@ +/* + * drivers/cpufreq/cpufreq_nightmare.c + * + * Copyright (C) 2011 Samsung Electronics co. ltd + * ByungChang Cha + * + * Based on ondemand governor + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Created by Alucard_24@xda + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_HAS_EARLYSUSPEND +#include +#endif +#define EARLYSUSPEND_HOTPLUGLOCK 1 + +/* + * runqueue average + */ + +#define RQ_AVG_TIMER_RATE 10 + +struct runqueue_data { + unsigned int nr_run_avg; + unsigned int update_rate; + int64_t last_time; + int64_t total_time; + struct delayed_work work; + struct workqueue_struct *nr_run_wq; + spinlock_t lock; +}; + +static struct runqueue_data *rq_data; +static void rq_work_fn(struct work_struct *work); + +static void start_rq_work(void) +{ + rq_data->nr_run_avg = 0; + rq_data->last_time = 0; + rq_data->total_time = 0; + if (rq_data->nr_run_wq == NULL) + rq_data->nr_run_wq = + create_singlethread_workqueue("nr_run_avg"); + + queue_delayed_work(rq_data->nr_run_wq, &rq_data->work, + msecs_to_jiffies(rq_data->update_rate)); + return; +} + +static void stop_rq_work(void) +{ + if (rq_data->nr_run_wq) + cancel_delayed_work(&rq_data->work); + return; +} + +static int __init init_rq_avg(void) +{ + rq_data = kzalloc(sizeof(struct runqueue_data), GFP_KERNEL); + if (rq_data == NULL) { + pr_err("%s cannot allocate memory\n", __func__); + return -ENOMEM; + } + spin_lock_init(&rq_data->lock); + rq_data->update_rate = RQ_AVG_TIMER_RATE; + INIT_DELAYED_WORK_DEFERRABLE(&rq_data->work, rq_work_fn); + + return 0; +} + +static void rq_work_fn(struct work_struct *work) +{ + int64_t time_diff = 0; + int64_t nr_run = 0; + unsigned long flags = 0; + int64_t cur_time = ktime_to_ns(ktime_get()); + + spin_lock_irqsave(&rq_data->lock, flags); + + if (rq_data->last_time == 0) + rq_data->last_time = cur_time; + if (rq_data->nr_run_avg == 0) + rq_data->total_time = 0; + + nr_run = nr_running() * 100; + time_diff = cur_time - rq_data->last_time; + do_div(time_diff, 1000 * 1000); + + if (time_diff != 0 && rq_data->total_time != 0) { + nr_run = (nr_run * time_diff) + + (rq_data->nr_run_avg * rq_data->total_time); + do_div(nr_run, rq_data->total_time + time_diff); + } + rq_data->nr_run_avg = nr_run; + rq_data->total_time += time_diff; + rq_data->last_time = cur_time; + + if (rq_data->update_rate != 0) + queue_delayed_work(rq_data->nr_run_wq, &rq_data->work, + msecs_to_jiffies(rq_data->update_rate)); + + spin_unlock_irqrestore(&rq_data->lock, flags); +} + +static unsigned int get_nr_run_avg(void) +{ + unsigned int nr_run_avg; + unsigned long flags = 0; + + spin_lock_irqsave(&rq_data->lock, flags); + nr_run_avg = rq_data->nr_run_avg; + rq_data->nr_run_avg = 0; + spin_unlock_irqrestore(&rq_data->lock, flags); + + return nr_run_avg; +} + + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_SAMPLING_UP_FACTOR (1) +#define MAX_SAMPLING_UP_FACTOR (100000) +#define DEF_SAMPLING_DOWN_FACTOR (2) +#define MAX_SAMPLING_DOWN_FACTOR (100000) +#define DEF_FREQ_STEP_DEC (5) + +#define DEF_SAMPLING_RATE (60000) +#define MIN_SAMPLING_RATE (10000) +#define MAX_HOTPLUG_RATE (40u) + +#define DEF_MAX_CPU_LOCK (0) +#define DEF_MIN_CPU_LOCK (0) +#define DEF_UP_NR_CPUS (1) +#define DEF_CPU_UP_RATE (10) +#define DEF_CPU_DOWN_RATE (20) +#define DEF_FREQ_STEP (30) + +#define DEF_START_DELAY (0) + +#define FREQ_FOR_RESPONSIVENESS (918000) + +#define HOTPLUG_DOWN_INDEX (0) +#define HOTPLUG_UP_INDEX (1) + +/* CPU freq will be increased if measured load > inc_cpu_load;*/ +#define DEF_INC_CPU_LOAD (80) +#define INC_CPU_LOAD_AT_MIN_FREQ (40) +#define UP_AVG_LOAD (65u) +/* CPU freq will be decreased if measured load < dec_cpu_load;*/ +#define DEF_DEC_CPU_LOAD (60) +#define DOWN_AVG_LOAD (30u) +#define DEF_FREQ_UP_BRAKE (5u) +#define DEF_HOTPLUG_COMPARE_LEVEL (0u) + +#ifdef CONFIG_MACH_MIDAS +static int hotplug_rq[4][2] = { + {0, 100}, {100, 200}, {200, 300}, {300, 0} +}; + +static int hotplug_freq[4][2] = { + {0, 540000}, + {378000, 540000}, + {378000, 540000}, + {378000, 0} +}; +#else +static int hotplug_rq[4][2] = { + {0, 100}, {100, 200}, {200, 300}, {300, 0} +}; + +static int hotplug_freq[4][2] = { + {0, 540000}, + {378000, 540000}, + {378000, 540000}, + {378000, 0} +}; +#endif + +static unsigned int min_sampling_rate; + +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_nightmare(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_NIGHTMARE +static +#endif +struct cpufreq_governor cpufreq_gov_nightmare = { + .name = "nightmare", + .governor = cpufreq_governor_nightmare, + .owner = THIS_MODULE, +}; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpufreq_nightmare_cpuinfo { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct work_struct up_work; + struct work_struct down_work; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_table_maxsize; + unsigned int avg_rate_mult; + int cpu; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; +}; +static DEFINE_PER_CPU(struct cpufreq_nightmare_cpuinfo, od_cpu_dbs_info); + +struct workqueue_struct *dvfs_workqueues; + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + + +/* + * dbs_mutex protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int freq_step_dec; + unsigned int ignore_nice; + unsigned int sampling_down_factor; + unsigned int io_is_busy; + /* nightmare tuners */ + unsigned int freq_step; + unsigned int cpu_up_rate; + unsigned int cpu_down_rate; + unsigned int up_nr_cpus; + unsigned int max_cpu_lock; + unsigned int min_cpu_lock; + atomic_t hotplug_lock; + unsigned int dvfs_debug; + unsigned int max_freq; + unsigned int min_freq; +#ifdef CONFIG_HAS_EARLYSUSPEND + int early_suspend; +#endif + unsigned int inc_cpu_load_at_min_freq; + unsigned int freq_for_responsiveness; + unsigned int inc_cpu_load; + unsigned int dec_cpu_load; + unsigned int up_avg_load; + unsigned int down_avg_load; + unsigned int sampling_up_factor; + unsigned int freq_up_brake; + unsigned int hotplug_compare_level; +} dbs_tuners_ins = { + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .freq_step_dec = DEF_FREQ_STEP_DEC, + .ignore_nice = 0, + .freq_step = DEF_FREQ_STEP, + .cpu_up_rate = DEF_CPU_UP_RATE, + .cpu_down_rate = DEF_CPU_DOWN_RATE, + .up_nr_cpus = DEF_UP_NR_CPUS, + .max_cpu_lock = DEF_MAX_CPU_LOCK, + .min_cpu_lock = DEF_MIN_CPU_LOCK, + .hotplug_lock = ATOMIC_INIT(0), + .dvfs_debug = 0, +#ifdef CONFIG_HAS_EARLYSUSPEND + .early_suspend = -1, +#endif + .inc_cpu_load_at_min_freq = INC_CPU_LOAD_AT_MIN_FREQ, + .freq_for_responsiveness = FREQ_FOR_RESPONSIVENESS, + .inc_cpu_load = DEF_INC_CPU_LOAD, + .dec_cpu_load = DEF_DEC_CPU_LOAD, + .up_avg_load = UP_AVG_LOAD, + .down_avg_load = DOWN_AVG_LOAD, + .sampling_up_factor = DEF_SAMPLING_UP_FACTOR, + .freq_up_brake = DEF_FREQ_UP_BRAKE, + .hotplug_compare_level = DEF_HOTPLUG_COMPARE_LEVEL, +}; + + +/* + * CPU hotplug lock interface + */ + +static atomic_t g_hotplug_count = ATOMIC_INIT(0); +static atomic_t g_hotplug_lock = ATOMIC_INIT(0); + +static void apply_hotplug_lock(void) +{ + int online, possible, lock, flag; + struct work_struct *work; + struct cpufreq_nightmare_cpuinfo *dbs_info; + + /* do turn_on/off cpus */ + dbs_info = &per_cpu(od_cpu_dbs_info, 0); /* from CPU0 */ + online = num_online_cpus(); + possible = num_possible_cpus(); + lock = atomic_read(&g_hotplug_lock); + flag = lock - online; + + if (lock == 0 || flag == 0) + return; + + work = flag > 0 ? &dbs_info->up_work : &dbs_info->down_work; + + pr_debug("%s online %d possible %d lock %d flag %d %d\n", + __func__, online, possible, lock, flag, (int)abs(flag)); + + queue_work_on(dbs_info->cpu, dvfs_workqueues, work); +} + +int cpufreq_nightmare_cpu_lock(int num_core) +{ + int prev_lock; + + if (num_core < 1 || num_core > num_possible_cpus()) + return -EINVAL; + + prev_lock = atomic_read(&g_hotplug_lock); + + if (prev_lock != 0 && prev_lock < num_core) + return -EINVAL; + else if (prev_lock == num_core) + atomic_inc(&g_hotplug_count); + + atomic_set(&g_hotplug_lock, num_core); + atomic_set(&g_hotplug_count, 1); + apply_hotplug_lock(); + + return 0; +} + +int cpufreq_nightmare_cpu_unlock(int num_core) +{ + int prev_lock = atomic_read(&g_hotplug_lock); + + if (prev_lock < num_core) + return 0; + else if (prev_lock == num_core) + atomic_dec(&g_hotplug_count); + + if (atomic_read(&g_hotplug_count) == 0) + atomic_set(&g_hotplug_lock, 0); + + return 0; +} + +void cpufreq_nightmare_min_cpu_lock(unsigned int num_core) +{ + int online, flag; + struct cpufreq_nightmare_cpuinfo *dbs_info; + + dbs_tuners_ins.min_cpu_lock = min(num_core, num_possible_cpus()); + + dbs_info = &per_cpu(od_cpu_dbs_info, 0); /* from CPU0 */ + online = num_online_cpus(); + flag = (int)num_core - online; + if (flag <= 0) + return; + queue_work_on(dbs_info->cpu, dvfs_workqueues, &dbs_info->up_work); +} + +void cpufreq_nightmare_min_cpu_unlock(void) +{ + int online, lock, flag; + struct cpufreq_nightmare_cpuinfo *dbs_info; + + dbs_tuners_ins.min_cpu_lock = 0; + + dbs_info = &per_cpu(od_cpu_dbs_info, 0); /* from CPU0 */ + online = num_online_cpus(); + lock = atomic_read(&g_hotplug_lock); + if (lock == 0) + return; + flag = lock - online; + if (flag >= 0) + return; + queue_work_on(dbs_info->cpu, dvfs_workqueues, &dbs_info->down_work); +} + +/* + * History of CPU usage + */ +struct cpu_usage { + unsigned int freq; + int load[NR_CPUS]; + unsigned int rq_avg; + unsigned int avg_load; +}; + +struct cpu_usage_history { + struct cpu_usage usage[MAX_HOTPLUG_RATE]; + unsigned int num_hist; +}; + +struct cpu_usage_history *hotplug_histories; + +static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) +{ + u64 idle_time; + u64 cur_wall_time; + u64 busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + + idle_time = cur_wall_time - busy_time; + if (wall) + *wall = jiffies_to_usecs(cur_wall_time); + + return jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + else + idle_time += get_cpu_iowait_time_us(cpu, wall); + + return idle_time; +} + +static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, + cputime64_t *wall) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + + if (iowait_time == -1ULL) + return 0; + + return iowait_time; +} + +/************************** sysfs interface ************************/ + +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", min_sampling_rate); +} + +define_one_global_ro(sampling_rate_min); + +/* cpufreq_nightmare Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(io_is_busy, io_is_busy); +show_one(sampling_down_factor, sampling_down_factor); +show_one(ignore_nice_load, ignore_nice); +show_one(freq_step_dec, freq_step_dec); +show_one(freq_step, freq_step); +show_one(cpu_up_rate, cpu_up_rate); +show_one(cpu_down_rate, cpu_down_rate); +show_one(up_nr_cpus, up_nr_cpus); +show_one(max_cpu_lock, max_cpu_lock); +show_one(min_cpu_lock, min_cpu_lock); +show_one(dvfs_debug, dvfs_debug); +show_one(inc_cpu_load_at_min_freq, inc_cpu_load_at_min_freq); +show_one(freq_for_responsiveness, freq_for_responsiveness); +show_one(inc_cpu_load, inc_cpu_load); +show_one(dec_cpu_load, dec_cpu_load); +show_one(up_avg_load, up_avg_load); +show_one(down_avg_load, down_avg_load); +show_one(sampling_up_factor, sampling_up_factor); +show_one(freq_up_brake, freq_up_brake); +show_one(hotplug_compare_level,hotplug_compare_level); + +static ssize_t show_hotplug_lock(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", atomic_read(&g_hotplug_lock)); +} + +#define show_hotplug_param(file_name, num_core, up_down) \ +static ssize_t show_##file_name##_##num_core##_##up_down \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", file_name[num_core - 1][up_down]); \ +} + +#define store_hotplug_param(file_name, num_core, up_down) \ +static ssize_t store_##file_name##_##num_core##_##up_down \ +(struct kobject *kobj, struct attribute *attr, \ + const char *buf, size_t count) \ +{ \ + unsigned int input; \ + int ret; \ + ret = sscanf(buf, "%u", &input); \ + if (ret != 1) \ + return -EINVAL; \ + file_name[num_core - 1][up_down] = input; \ + return count; \ +} + +show_hotplug_param(hotplug_freq, 1, 1); +show_hotplug_param(hotplug_freq, 2, 0); +#ifndef CONFIG_CPU_EXYNOS4210 +show_hotplug_param(hotplug_freq, 2, 1); +show_hotplug_param(hotplug_freq, 3, 0); +show_hotplug_param(hotplug_freq, 3, 1); +show_hotplug_param(hotplug_freq, 4, 0); +#endif + +show_hotplug_param(hotplug_rq, 1, 1); +show_hotplug_param(hotplug_rq, 2, 0); +#ifndef CONFIG_CPU_EXYNOS4210 +show_hotplug_param(hotplug_rq, 2, 1); +show_hotplug_param(hotplug_rq, 3, 0); +show_hotplug_param(hotplug_rq, 3, 1); +show_hotplug_param(hotplug_rq, 4, 0); +#endif + +store_hotplug_param(hotplug_freq, 1, 1); +store_hotplug_param(hotplug_freq, 2, 0); +#ifndef CONFIG_CPU_EXYNOS4210 +store_hotplug_param(hotplug_freq, 2, 1); +store_hotplug_param(hotplug_freq, 3, 0); +store_hotplug_param(hotplug_freq, 3, 1); +store_hotplug_param(hotplug_freq, 4, 0); +#endif + +store_hotplug_param(hotplug_rq, 1, 1); +store_hotplug_param(hotplug_rq, 2, 0); +#ifndef CONFIG_CPU_EXYNOS4210 +store_hotplug_param(hotplug_rq, 2, 1); +store_hotplug_param(hotplug_rq, 3, 0); +store_hotplug_param(hotplug_rq, 3, 1); +store_hotplug_param(hotplug_rq, 4, 0); +#endif + +define_one_global_rw(hotplug_freq_1_1); +define_one_global_rw(hotplug_freq_2_0); +#ifndef CONFIG_CPU_EXYNOS4210 +define_one_global_rw(hotplug_freq_2_1); +define_one_global_rw(hotplug_freq_3_0); +define_one_global_rw(hotplug_freq_3_1); +define_one_global_rw(hotplug_freq_4_0); +#endif + +define_one_global_rw(hotplug_rq_1_1); +define_one_global_rw(hotplug_rq_2_0); +#ifndef CONFIG_CPU_EXYNOS4210 +define_one_global_rw(hotplug_rq_2_1); +define_one_global_rw(hotplug_rq_3_0); +define_one_global_rw(hotplug_rq_3_1); +define_one_global_rw(hotplug_rq_4_0); +#endif + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + return count; +} + +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.io_is_busy = !!input; + return count; +} + +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + dbs_tuners_ins.sampling_down_factor = input; + + return count; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpufreq_nightmare_cpuinfo *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = + get_cpu_idle_time(j, &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + return count; +} + +static ssize_t store_freq_step_dec(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.freq_step_dec = min(input, 100u); + return count; +} + +static ssize_t store_freq_step(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.freq_step = min(input, 100u); + return count; +} + +static ssize_t store_cpu_up_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_up_rate = min(input, MAX_HOTPLUG_RATE); + return count; +} + +static ssize_t store_cpu_down_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_down_rate = min(input, MAX_HOTPLUG_RATE); + return count; +} + +static ssize_t store_up_nr_cpus(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.up_nr_cpus = min(input, num_possible_cpus()); + return count; +} + +static ssize_t store_max_cpu_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.max_cpu_lock = min(input, num_possible_cpus()); + return count; +} + +static ssize_t store_min_cpu_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + if (input == 0) + cpufreq_nightmare_min_cpu_unlock(); + else + cpufreq_nightmare_min_cpu_lock(input); + return count; +} + +static ssize_t store_hotplug_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + int prev_lock; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + input = min(input, num_possible_cpus()); + prev_lock = atomic_read(&dbs_tuners_ins.hotplug_lock); + + if (prev_lock) + cpufreq_nightmare_cpu_unlock(prev_lock); + + if (input == 0) { + atomic_set(&dbs_tuners_ins.hotplug_lock, 0); + return count; + } + + ret = cpufreq_nightmare_cpu_lock(input); + if (ret) { + printk(KERN_ERR "[HOTPLUG] already locked with smaller value %d < %d\n", + atomic_read(&g_hotplug_lock), input); + return ret; + } + + atomic_set(&dbs_tuners_ins.hotplug_lock, input); + + return count; +} + +static ssize_t store_dvfs_debug(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.dvfs_debug = input > 0; + return count; +} + +static ssize_t store_inc_cpu_load_at_min_freq(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > 100) { + return -EINVAL; + } + dbs_tuners_ins.inc_cpu_load_at_min_freq = min(input,dbs_tuners_ins.inc_cpu_load); + return count; +} + +static ssize_t store_freq_for_responsiveness(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.freq_for_responsiveness = input; + return count; +} + +/* inc_cpu_load */ +static ssize_t store_inc_cpu_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.inc_cpu_load = max(min(input,100u),10u); + return count; +} + +/* dec_cpu_load */ +static ssize_t store_dec_cpu_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.dec_cpu_load = max(min(input,95u),5u); + return count; +} + +/* up_avg_load */ +static ssize_t store_up_avg_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.up_avg_load = max(min(input,100u),10u); + return count; +} + +/* down_avg_load */ +static ssize_t store_down_avg_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.down_avg_load = max(min(input,95u),5u); + return count; +} + +/* sampling_up_factor */ +static ssize_t store_sampling_up_factor(struct kobject *a, + struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_UP_FACTOR || input < 1) + return -EINVAL; + dbs_tuners_ins.sampling_up_factor = input; + + return count; +} + +/* freq_up_brake */ +static ssize_t store_freq_up_brake(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1 || input < 0 || input > 100) + return -EINVAL; + + if (input == dbs_tuners_ins.freq_up_brake) { /* nothing to do */ + return count; + } + + dbs_tuners_ins.freq_up_brake = input; + + return count; +} + +/* hotplug_compare_level */ +static ssize_t store_hotplug_compare_level(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1 || input < 0 || input > 1) + return -EINVAL; + + if (input == dbs_tuners_ins.hotplug_compare_level) { /* nothing to do */ + return count; + } + + dbs_tuners_ins.hotplug_compare_level = input; + + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(io_is_busy); +define_one_global_rw(sampling_down_factor); +define_one_global_rw(ignore_nice_load); +define_one_global_rw(freq_step_dec); +define_one_global_rw(freq_step); +define_one_global_rw(cpu_up_rate); +define_one_global_rw(cpu_down_rate); +define_one_global_rw(up_nr_cpus); +define_one_global_rw(max_cpu_lock); +define_one_global_rw(min_cpu_lock); +define_one_global_rw(hotplug_lock); +define_one_global_rw(dvfs_debug); +define_one_global_rw(inc_cpu_load_at_min_freq); +define_one_global_rw(freq_for_responsiveness); +define_one_global_rw(inc_cpu_load); +define_one_global_rw(dec_cpu_load); +define_one_global_rw(up_avg_load); +define_one_global_rw(down_avg_load); +define_one_global_rw(sampling_up_factor); +define_one_global_rw(freq_up_brake); +define_one_global_rw(hotplug_compare_level); + +static struct attribute *dbs_attributes[] = { + &sampling_rate_min.attr, + &sampling_rate.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &io_is_busy.attr, + &freq_step_dec.attr, + &freq_step.attr, + &cpu_up_rate.attr, + &cpu_down_rate.attr, + &up_nr_cpus.attr, + /* priority: hotplug_lock > max_cpu_lock > min_cpu_lock + Exception: hotplug_lock on early_suspend uses min_cpu_lock */ + &max_cpu_lock.attr, + &min_cpu_lock.attr, + &hotplug_lock.attr, + &dvfs_debug.attr, + &hotplug_freq_1_1.attr, + &hotplug_freq_2_0.attr, +#ifndef CONFIG_CPU_EXYNOS4210 + &hotplug_freq_2_1.attr, + &hotplug_freq_3_0.attr, + &hotplug_freq_3_1.attr, + &hotplug_freq_4_0.attr, +#endif + &hotplug_rq_1_1.attr, + &hotplug_rq_2_0.attr, +#ifndef CONFIG_CPU_EXYNOS4210 + &hotplug_rq_2_1.attr, + &hotplug_rq_3_0.attr, + &hotplug_rq_3_1.attr, + &hotplug_rq_4_0.attr, +#endif + &inc_cpu_load_at_min_freq.attr, + &freq_for_responsiveness.attr, + &inc_cpu_load.attr, + &dec_cpu_load.attr, + &up_avg_load.attr, + &down_avg_load.attr, + &sampling_up_factor.attr, + &freq_up_brake.attr, + &hotplug_compare_level.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "nightmare", +}; + +/************************** sysfs end ************************/ + +static void __ref cpu_up_work(struct work_struct *work) +{ + int cpu; + int online = num_online_cpus(); + int nr_up = dbs_tuners_ins.up_nr_cpus; + int min_cpu_lock = dbs_tuners_ins.min_cpu_lock; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock && min_cpu_lock) + nr_up = max(hotplug_lock, min_cpu_lock) - online; + else if (hotplug_lock) + nr_up = hotplug_lock - online; + else if (min_cpu_lock) + nr_up = max(nr_up, min_cpu_lock - online); + + if (online == 1) { + printk(KERN_ERR "CPU_UP 3\n"); + cpu_up(num_possible_cpus() - 1); + nr_up -= 1; + } + + for_each_cpu_not(cpu, cpu_online_mask) { + if (nr_up-- == 0) + break; + if (cpu == 0) + continue; + printk(KERN_ERR "CPU_UP %d\n", cpu); + cpu_up(cpu); + } +} + +static void cpu_down_work(struct work_struct *work) +{ + int cpu; + int online = num_online_cpus(); + int nr_down = 1; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock) + nr_down = online - hotplug_lock; + + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + printk(KERN_ERR "CPU_DOWN %d\n", cpu); + cpu_down(cpu); + if (--nr_down == 0) + break; + } +} + +static void debug_hotplug_check(int which, int rq_avg, int freq, + struct cpu_usage *usage) +{ + int cpu; + printk(KERN_ERR "CHECK %s rq %d.%02d freq %d [", which ? "up" : "down", + rq_avg / 100, rq_avg % 100, freq); + for_each_online_cpu(cpu) { + printk(KERN_ERR "(%d, %d), ", cpu, usage->load[cpu]); + } + printk(KERN_ERR "]\n"); +} + +static int check_up(void) +{ + int num_hist = hotplug_histories->num_hist; + struct cpu_usage *usage; + int freq, rq_avg; + int avg_load; + int i; + int up_rate = dbs_tuners_ins.cpu_up_rate; + unsigned int up_avg_load = dbs_tuners_ins.up_avg_load; + unsigned int hotplug_compare_level = dbs_tuners_ins.hotplug_compare_level; + int up_freq, up_rq; + int min_freq = INT_MAX; + int min_rq_avg = INT_MAX; + int min_avg_load = INT_MAX; + int online; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock > 0) + return 0; + + online = num_online_cpus(); + up_freq = hotplug_freq[online - 1][HOTPLUG_UP_INDEX]; + up_rq = hotplug_rq[online - 1][HOTPLUG_UP_INDEX]; + + if (online == num_possible_cpus()) + return 0; + + if (dbs_tuners_ins.max_cpu_lock != 0 + && online >= dbs_tuners_ins.max_cpu_lock) + return 0; + + if (dbs_tuners_ins.min_cpu_lock != 0 + && online < dbs_tuners_ins.min_cpu_lock) + return 1; + + if (num_hist == 0 || num_hist % up_rate) + return 0; + + if (hotplug_compare_level == 0) { + for (i = num_hist - 1; i >= num_hist - up_rate; --i) { + usage = &hotplug_histories->usage[i]; + + freq = usage->freq; + rq_avg = usage->rq_avg; + avg_load = usage->avg_load; + + min_freq = min(min_freq, freq); + min_rq_avg = min(min_rq_avg, rq_avg); + min_avg_load = min(min_avg_load, avg_load); + + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(1, rq_avg, freq, usage); + } + } else { + usage = &hotplug_histories->usage[num_hist - 1]; + min_freq = usage->freq; + min_rq_avg = usage->rq_avg; + min_avg_load = usage->avg_load; + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(1, min_rq_avg, min_freq, usage); + } + + if (min_freq >= up_freq && min_rq_avg > up_rq) { + if (online >= 1) { + if (min_avg_load < up_avg_load) + return 0; + } + printk(KERN_ERR "[HOTPLUG IN] %s %d>=%d && %d>%d\n", + __func__, min_freq, up_freq, min_rq_avg, up_rq); + hotplug_histories->num_hist = 0; + return 1; + } + return 0; +} + +static int check_down(void) +{ + int num_hist = hotplug_histories->num_hist; + struct cpu_usage *usage; + int freq, rq_avg; + int avg_load; + int i; + int down_rate = dbs_tuners_ins.cpu_down_rate; + unsigned int down_avg_load = dbs_tuners_ins.down_avg_load; + unsigned int hotplug_compare_level = dbs_tuners_ins.hotplug_compare_level; + int down_freq, down_rq; + int max_freq = 0; + int max_rq_avg = 0; + int max_avg_load = 0; + int online; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock > 0) + return 0; + + online = num_online_cpus(); + down_freq = hotplug_freq[online - 1][HOTPLUG_DOWN_INDEX]; + down_rq = hotplug_rq[online - 1][HOTPLUG_DOWN_INDEX]; + + if (online == 1) + return 0; + + if (dbs_tuners_ins.max_cpu_lock != 0 + && online > dbs_tuners_ins.max_cpu_lock) + return 1; + + if (dbs_tuners_ins.min_cpu_lock != 0 + && online <= dbs_tuners_ins.min_cpu_lock) + return 0; + + if (num_hist == 0 || num_hist % down_rate) + return 0; + + if (hotplug_compare_level == 0) { + for (i = num_hist - 1; i >= num_hist - down_rate; --i) { + usage = &hotplug_histories->usage[i]; + + freq = usage->freq; + rq_avg = usage->rq_avg; + avg_load = usage->avg_load; + + max_freq = max(max_freq, freq); + max_rq_avg = max(max_rq_avg, rq_avg); + max_avg_load = max(max_avg_load, avg_load); + + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(0, rq_avg, freq, usage); + } + } else { + usage = &hotplug_histories->usage[num_hist - 1]; + max_freq = usage->freq; + max_rq_avg = usage->rq_avg; + max_avg_load = usage->avg_load; + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(0, max_rq_avg, max_freq, usage); + } + + if ((max_freq <= down_freq && max_rq_avg <= down_rq) || (online >= 2 && max_avg_load < down_avg_load)) { + printk(KERN_ERR "[HOTPLUG OUT] %s %d<=%d && %d<%d\n", + __func__, max_freq, down_freq, max_rq_avg, down_rq); + hotplug_histories->num_hist = 0; + return 1; + } + + return 0; +} + +static void dbs_check_cpu(struct cpufreq_nightmare_cpuinfo *this_dbs_info) +{ + struct cpufreq_policy *policy; + unsigned int j; + int num_hist = hotplug_histories->num_hist; + int max_hotplug_rate = max(dbs_tuners_ins.cpu_up_rate,dbs_tuners_ins.cpu_down_rate); + int inc_cpu_load = dbs_tuners_ins.inc_cpu_load; + int dec_cpu_load = dbs_tuners_ins.dec_cpu_load; + unsigned int avg_rate_mult = 0; + + /* add total_load, avg_load to get average load */ + unsigned int total_load = 0; + unsigned int avg_load = 0; + int rq_avg = 0; + policy = this_dbs_info->cur_policy; + + hotplug_histories->usage[num_hist].freq = policy->cur; + hotplug_histories->usage[num_hist].rq_avg = get_nr_run_avg(); + + /* add total_load, avg_load to get average load */ + rq_avg = hotplug_histories->usage[num_hist].rq_avg; + + ++hotplug_histories->num_hist; + + for_each_cpu(j, policy->cpus) { + struct cpufreq_nightmare_cpuinfo *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + cputime64_t prev_wall_time, prev_idle_time, prev_iowait_time; + unsigned int idle_time, wall_time, iowait_time; + int load; + //int freq_avg; + + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + + prev_wall_time = j_dbs_info->prev_cpu_wall; + prev_idle_time = j_dbs_info->prev_cpu_idle; + prev_iowait_time = j_dbs_info->prev_cpu_iowait; + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); + + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + prev_wall_time); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + prev_idle_time); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, + prev_iowait_time); + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + if (dbs_tuners_ins.ignore_nice) { + u64 cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - + j_dbs_info->prev_cpu_nice; + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + + if (cpu_online(j)) { + total_load += load; + hotplug_histories->usage[num_hist].load[j] = load; + } else { + hotplug_histories->usage[num_hist].load[j] = -1; + } + + } + /* calculate the average load across all related CPUs */ + avg_load = total_load / num_online_cpus(); + hotplug_histories->usage[num_hist].avg_load = avg_load; + + /* Check for CPU hotplug */ + if (check_up()) { + queue_work_on(this_dbs_info->cpu, dvfs_workqueues,&this_dbs_info->up_work); + } + else if (check_down()) { + queue_work_on(this_dbs_info->cpu, dvfs_workqueues,&this_dbs_info->down_work); + } + if (hotplug_histories->num_hist == max_hotplug_rate) + hotplug_histories->num_hist = 0; + + /* CPUs Online Scale Frequency*/ + for_each_cpu(j, policy->cpus) { + struct cpufreq_nightmare_cpuinfo *j_dbs_info; + int load; + int index; + + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + + if (cpu_online(j)) { + index = 0; + load = hotplug_histories->usage[num_hist].load[j]; + // just a tips to scale up the frequency fastly + if (j_dbs_info->cur_policy->cur < dbs_tuners_ins.freq_for_responsiveness) + inc_cpu_load = dbs_tuners_ins.inc_cpu_load_at_min_freq; + else + inc_cpu_load = dbs_tuners_ins.inc_cpu_load; + + // Check for frequency increase or for frequency decrease + if (load >= inc_cpu_load) { + unsigned int inc_load = (load * j_dbs_info->cur_policy->min) / 100; + unsigned int inc_step = (dbs_tuners_ins.freq_step * j_dbs_info->cur_policy->min) / 100; + unsigned int inc; + unsigned int freq_up = 0; + + avg_rate_mult += dbs_tuners_ins.sampling_up_factor; + + // if we cannot increment the frequency anymore, break out early + if (j_dbs_info->cur_policy->cur == j_dbs_info->cur_policy->max) { + continue; + } + + inc = inc_load + inc_step; + inc -= (dbs_tuners_ins.freq_up_brake * j_dbs_info->cur_policy->min) / 100; + + freq_up = min(j_dbs_info->cur_policy->max,j_dbs_info->cur_policy->cur + inc); + + if (freq_up != j_dbs_info->cur_policy->cur) { + __cpufreq_driver_target(j_dbs_info->cur_policy, freq_up, CPUFREQ_RELATION_L); + } + + } + else if (load < dec_cpu_load && load > -1) { + unsigned int dec_load = ((100 - load) * (j_dbs_info->cur_policy->min)) / 100; + unsigned int dec_step = (dbs_tuners_ins.freq_step_dec * (j_dbs_info->cur_policy->min)) / 100; + unsigned int dec; + unsigned int freq_down = 0; + + avg_rate_mult += dbs_tuners_ins.sampling_down_factor; + + // if we cannot reduce the frequency anymore, break out early + if (j_dbs_info->cur_policy->cur == j_dbs_info->cur_policy->min) { + continue; + } + + dec = dec_load + dec_step; + + freq_down = max(j_dbs_info->cur_policy->min,j_dbs_info->cur_policy->cur - dec); + + if (freq_down != j_dbs_info->cur_policy->cur) { + __cpufreq_driver_target(j_dbs_info->cur_policy, freq_down, CPUFREQ_RELATION_L); + } + } + } + } + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + if (avg_rate_mult > 0) + this_dbs_info->avg_rate_mult = (avg_rate_mult * 10) / num_online_cpus(); + else + this_dbs_info->avg_rate_mult = 10; + + return; +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpufreq_nightmare_cpuinfo *dbs_info = + container_of(work, struct cpufreq_nightmare_cpuinfo, work.work); + unsigned int cpu = dbs_info->cpu; + int delay; + + mutex_lock(&dbs_info->timer_mutex); + + dbs_check_cpu(dbs_info); + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + delay = usecs_to_jiffies((dbs_tuners_ins.sampling_rate * (dbs_info->avg_rate_mult < 10 ? 10 : dbs_info->avg_rate_mult)) / 10); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + queue_delayed_work_on(cpu, dvfs_workqueues, &dbs_info->work, delay); + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpufreq_nightmare_cpuinfo *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(DEF_START_DELAY * 1000 * 1000 + + dbs_tuners_ins.sampling_rate); + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + INIT_WORK(&dbs_info->up_work, cpu_up_work); + INIT_WORK(&dbs_info->down_work, cpu_down_work); + + queue_delayed_work_on(dbs_info->cpu, dvfs_workqueues, + &dbs_info->work, delay + 2 * HZ); +} + +static inline void dbs_timer_exit(struct cpufreq_nightmare_cpuinfo *dbs_info) +{ + cancel_delayed_work_sync(&dbs_info->work); + cancel_work_sync(&dbs_info->up_work); + cancel_work_sync(&dbs_info->down_work); +} + +static int reboot_notifier_call(struct notifier_block *this, + unsigned long code, void *_cmd) +{ + atomic_set(&g_hotplug_lock, 1); + return NOTIFY_DONE; +} + +static struct notifier_block reboot_notifier = { + .notifier_call = reboot_notifier_call, +}; + +#ifdef CONFIG_HAS_EARLYSUSPEND +static struct early_suspend early_suspend; +unsigned int previous_freq_step; +unsigned int previous_sampling_rate; +static void cpufreq_nightmare_early_suspend(struct early_suspend *h) +{ +#if EARLYSUSPEND_HOTPLUGLOCK + dbs_tuners_ins.early_suspend = + atomic_read(&g_hotplug_lock); +#endif + previous_freq_step = dbs_tuners_ins.freq_step; + previous_sampling_rate = dbs_tuners_ins.sampling_rate; + dbs_tuners_ins.freq_step = 10; + dbs_tuners_ins.sampling_rate = 200000; +#if EARLYSUSPEND_HOTPLUGLOCK + atomic_set(&g_hotplug_lock, + (dbs_tuners_ins.min_cpu_lock) ? dbs_tuners_ins.min_cpu_lock : 1); + apply_hotplug_lock(); + stop_rq_work(); +#endif +} +static void cpufreq_nightmare_late_resume(struct early_suspend *h) +{ +#if EARLYSUSPEND_HOTPLUGLOCK + atomic_set(&g_hotplug_lock, dbs_tuners_ins.early_suspend); +#endif + dbs_tuners_ins.early_suspend = -1; + dbs_tuners_ins.freq_step = previous_freq_step; + dbs_tuners_ins.sampling_rate = previous_sampling_rate; +#if EARLYSUSPEND_HOTPLUGLOCK + apply_hotplug_lock(); + start_rq_work(); +#endif +} +#endif + +static int cpufreq_governor_nightmare(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpufreq_nightmare_cpuinfo *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + dbs_tuners_ins.max_freq = policy->max; + dbs_tuners_ins.min_freq = policy->min; + hotplug_histories->num_hist = 0; + start_rq_work(); + + mutex_lock(&dbs_mutex); + + dbs_enable++; + for_each_cpu(j, policy->cpus) { + struct cpufreq_nightmare_cpuinfo *j_dbs_info; + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + j_dbs_info->prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + this_dbs_info->cpu = cpu; + this_dbs_info->avg_rate_mult = 20; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + min_sampling_rate = MIN_SAMPLING_RATE; + dbs_tuners_ins.sampling_rate = DEF_SAMPLING_RATE; + dbs_tuners_ins.io_is_busy = 0; + } + mutex_unlock(&dbs_mutex); + + register_reboot_notifier(&reboot_notifier); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); + +#if !EARLYSUSPEND_HOTPLUGLOCK + register_pm_notifier(&pm_notifier); +#endif +#ifdef CONFIG_HAS_EARLYSUSPEND + register_early_suspend(&early_suspend); +#endif + break; + + case CPUFREQ_GOV_STOP: +#ifdef CONFIG_HAS_EARLYSUSPEND + unregister_early_suspend(&early_suspend); +#endif +#if !EARLYSUSPEND_HOTPLUGLOCK + unregister_pm_notifier(&pm_notifier); +#endif + + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + mutex_destroy(&this_dbs_info->timer_mutex); + + unregister_reboot_notifier(&reboot_notifier); + + dbs_enable--; + mutex_unlock(&dbs_mutex); + + stop_rq_work(); + + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, + CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, + CPUFREQ_RELATION_L); + + mutex_unlock(&this_dbs_info->timer_mutex); + break; + } + return 0; +} + +static int __init cpufreq_gov_nightmare_init(void) +{ + int ret; + + ret = init_rq_avg(); + if (ret) + return ret; + + hotplug_histories = kzalloc(sizeof(struct cpu_usage_history), GFP_KERNEL); + if (!hotplug_histories) { + pr_err("%s cannot create hotplug history array\n", __func__); + ret = -ENOMEM; + goto err_hist; + } + + dvfs_workqueues = create_workqueue("knightmare"); + if (!dvfs_workqueues) { + pr_err("%s cannot create workqueue\n", __func__); + ret = -ENOMEM; + goto err_queue; + } + + ret = cpufreq_register_governor(&cpufreq_gov_nightmare); + if (ret) + goto err_reg; + +#ifdef CONFIG_HAS_EARLYSUSPEND + early_suspend.level = EARLY_SUSPEND_LEVEL_DISABLE_FB; + early_suspend.suspend = cpufreq_nightmare_early_suspend; + early_suspend.resume = cpufreq_nightmare_late_resume; +#endif + + return ret; + +err_reg: + destroy_workqueue(dvfs_workqueues); +err_queue: + kfree(hotplug_histories); +err_hist: + kfree(rq_data); + return ret; +} + +static void __exit cpufreq_gov_nightmare_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_nightmare); + destroy_workqueue(dvfs_workqueues); + kfree(hotplug_histories); + kfree(rq_data); +} + +MODULE_AUTHOR("ByungChang Cha "); +MODULE_DESCRIPTION("'cpufreq_nightmare' - A dynamic cpufreq/cpuhotplug governor"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_NIGHTMARE +fs_initcall(cpufreq_gov_nightmare_init); +#else +module_init(cpufreq_gov_nightmare_init); +#endif +module_exit(cpufreq_gov_nightmare_exit); diff --git a/drivers/cpufreq/cpufreq_slp.c b/drivers/cpufreq/cpufreq_slp.c new file mode 100644 index 00000000..bb1b00cf --- /dev/null +++ b/drivers/cpufreq/cpufreq_slp.c @@ -0,0 +1,1438 @@ +/* + * drivers/cpufreq/cpufreq_pegasusq.c + * + * Copyright (C) 2011 Samsung Electronics co. ltd + * ByungChang Cha + * + * Based on ondemand governor + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_HAS_EARLYSUSPEND +#include +#endif +#define EARLYSUSPEND_HOTPLUGLOCK 1 + +/* + * runqueue average + */ + +#define RQ_AVG_TIMER_RATE 10 + +extern void apenable_auto_hotplug(bool state); +extern bool apget_enable_auto_hotplug(void); +static bool prev_apenable; + +struct runqueue_data { + unsigned int nr_run_avg; + unsigned int update_rate; + int64_t last_time; + int64_t total_time; + struct delayed_work work; + struct workqueue_struct *nr_run_wq; + spinlock_t lock; +}; + +static struct runqueue_data *rq_data; +static void rq_work_fn(struct work_struct *work); + +static void start_rq_work(void) +{ + rq_data->nr_run_avg = 0; + rq_data->last_time = 0; + rq_data->total_time = 0; + if (rq_data->nr_run_wq == NULL) + rq_data->nr_run_wq = + create_singlethread_workqueue("nr_run_avg"); + + queue_delayed_work(rq_data->nr_run_wq, &rq_data->work, + msecs_to_jiffies(rq_data->update_rate)); + return; +} + +static void stop_rq_work(void) +{ + if (rq_data->nr_run_wq) + cancel_delayed_work(&rq_data->work); + return; +} + +static int __init init_rq_avg(void) +{ + rq_data = kzalloc(sizeof(struct runqueue_data), GFP_KERNEL); + if (rq_data == NULL) { + pr_err("%s cannot allocate memory\n", __func__); + return -ENOMEM; + } + spin_lock_init(&rq_data->lock); + rq_data->update_rate = RQ_AVG_TIMER_RATE; + INIT_DELAYED_WORK_DEFERRABLE(&rq_data->work, rq_work_fn); + + return 0; +} + +static void rq_work_fn(struct work_struct *work) +{ + int64_t time_diff = 0; + int64_t nr_run = 0; + unsigned long flags = 0; + int64_t cur_time = ktime_to_ns(ktime_get()); + + spin_lock_irqsave(&rq_data->lock, flags); + + if (rq_data->last_time == 0) + rq_data->last_time = cur_time; + if (rq_data->nr_run_avg == 0) + rq_data->total_time = 0; + + nr_run = nr_running() * 100; + time_diff = cur_time - rq_data->last_time; + do_div(time_diff, 1000 * 1000); + + if (time_diff != 0 && rq_data->total_time != 0) { + nr_run = (nr_run * time_diff) + + (rq_data->nr_run_avg * rq_data->total_time); + do_div(nr_run, rq_data->total_time + time_diff); + } + rq_data->nr_run_avg = nr_run; + rq_data->total_time += time_diff; + rq_data->last_time = cur_time; + + if (rq_data->update_rate != 0) + queue_delayed_work(rq_data->nr_run_wq, &rq_data->work, + msecs_to_jiffies(rq_data->update_rate)); + + spin_unlock_irqrestore(&rq_data->lock, flags); +} + +static unsigned int get_nr_run_avg(void) +{ + unsigned int nr_run_avg; + unsigned long flags = 0; + + spin_lock_irqsave(&rq_data->lock, flags); + nr_run_avg = rq_data->nr_run_avg; + rq_data->nr_run_avg = 0; + spin_unlock_irqrestore(&rq_data->lock, flags); + + return nr_run_avg; +} + + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_SAMPLING_DOWN_FACTOR (2) +#define MAX_SAMPLING_DOWN_FACTOR (100000) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) +#define DEF_FREQUENCY_UP_THRESHOLD (85) +#define DEF_FREQUENCY_MIN_SAMPLE_RATE (10000) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) +#define DEF_SAMPLING_RATE (50000) +#define MIN_SAMPLING_RATE (10000) +#define MAX_HOTPLUG_RATE (40u) + +#define DEF_MAX_CPU_LOCK (0) +#define DEF_MIN_CPU_LOCK (0) +#define DEF_CPU_UP_FREQ (500000) +#define DEF_CPU_DOWN_FREQ (200000) +#define DEF_UP_NR_CPUS (1) +#define DEF_CPU_UP_RATE (10) +#define DEF_CPU_DOWN_RATE (20) +#define DEF_FREQ_STEP (40) +#define DEF_START_DELAY (0) + +#define UP_THRESHOLD_AT_MIN_FREQ (40) +#define FREQ_FOR_RESPONSIVENESS (500000) + +#define HOTPLUG_DOWN_INDEX (0) +#define HOTPLUG_UP_INDEX (1) + +#ifdef CONFIG_MACH_MIDAS +static int hotplug_rq[4][2] = { + {0, 100}, {100, 200}, {200, 300}, {300, 0} +}; + +static int hotplug_freq[4][2] = { + {0, 500000}, + {200000, 500000}, + {200000, 500000}, + {200000, 0} +}; +#else +static int hotplug_rq[4][2] = { + {0, 100}, {100, 200}, {200, 300}, {300, 0} +}; + +static int hotplug_freq[4][2] = { + {0, 500000}, + {200000, 500000}, + {200000, 500000}, + {200000, 0} +}; +#endif + +static unsigned int min_sampling_rate; + +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ +static +#endif +struct cpufreq_governor cpufreq_gov_pegasusq = { + .name = "slp", + .governor = cpufreq_governor_dbs, + .owner = THIS_MODULE, +}; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct work_struct up_work; + struct work_struct down_work; + struct cpufreq_frequency_table *freq_table; + unsigned int rate_mult; + int cpu; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); + +static struct workqueue_struct *dvfs_workqueue; + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * dbs_mutex protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int ignore_nice; + unsigned int sampling_down_factor; + unsigned int io_is_busy; + /* pegasusq tuners */ + unsigned int freq_step; + unsigned int cpu_up_rate; + unsigned int cpu_down_rate; + unsigned int cpu_up_freq; + unsigned int cpu_down_freq; + unsigned int up_nr_cpus; + unsigned int max_cpu_lock; + unsigned int min_cpu_lock; + atomic_t hotplug_lock; + unsigned int dvfs_debug; + unsigned int max_freq; + unsigned int min_freq; +#ifdef CONFIG_HAS_EARLYSUSPEND + int early_suspend; +#endif +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .ignore_nice = 0, + .freq_step = DEF_FREQ_STEP, + .cpu_up_rate = DEF_CPU_UP_RATE, + .cpu_down_rate = DEF_CPU_DOWN_RATE, + .cpu_up_freq = DEF_CPU_UP_FREQ, + .cpu_down_freq = DEF_CPU_DOWN_FREQ, + .up_nr_cpus = DEF_UP_NR_CPUS, + .max_cpu_lock = DEF_MAX_CPU_LOCK, + .min_cpu_lock = DEF_MIN_CPU_LOCK, + .hotplug_lock = ATOMIC_INIT(0), + .dvfs_debug = 0, +#ifdef CONFIG_HAS_EARLYSUSPEND + .early_suspend = -1, +#endif +}; + + +/* + * CPU hotplug lock interface + */ + +static atomic_t g_hotplug_count = ATOMIC_INIT(0); +static atomic_t g_hotplug_lock = ATOMIC_INIT(0); + +static void apply_hotplug_lock(void) +{ + int online, possible, lock, flag; + struct work_struct *work; + struct cpu_dbs_info_s *dbs_info; + + /* do turn_on/off cpus */ + dbs_info = &per_cpu(od_cpu_dbs_info, 0); /* from CPU0 */ + online = num_online_cpus(); + possible = num_possible_cpus(); + lock = atomic_read(&g_hotplug_lock); + flag = lock - online; + + if (flag == 0) + return; + + work = flag > 0 ? &dbs_info->up_work : &dbs_info->down_work; + + pr_debug("%s online %d possible %d lock %d flag %d %d\n", + __func__, online, possible, lock, flag, (int)abs(flag)); + + queue_work_on(dbs_info->cpu, dvfs_workqueue, work); +} + +static int cpufreq_pegasusq_cpu_lock(int num_core) +{ + int prev_lock; + + if (num_core < 1 || num_core > num_possible_cpus()) + return -EINVAL; + + prev_lock = atomic_read(&g_hotplug_lock); + + if (prev_lock != 0 && prev_lock < num_core) + return -EINVAL; + else if (prev_lock == num_core) + atomic_inc(&g_hotplug_count); + + atomic_set(&g_hotplug_lock, num_core); + atomic_set(&g_hotplug_count, 1); + apply_hotplug_lock(); + + return 0; +} + +static int cpufreq_pegasusq_cpu_unlock(int num_core) +{ + int prev_lock = atomic_read(&g_hotplug_lock); + + if (prev_lock < num_core) + return 0; + else if (prev_lock == num_core) + atomic_dec(&g_hotplug_count); + + if (atomic_read(&g_hotplug_count) == 0) + atomic_set(&g_hotplug_lock, 0); + + return 0; +} + + +/* + * History of CPU usage + */ +struct cpu_usage { + unsigned int freq; + unsigned int load[NR_CPUS]; + unsigned int rq_avg; +}; + +struct cpu_usage_history { + struct cpu_usage usage[MAX_HOTPLUG_RATE]; + unsigned int num_hist; +}; + +static struct cpu_usage_history *hotplug_history; + +static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, + u64 *wall) +{ + u64 idle_time; + u64 cur_wall_time; + u64 busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + + idle_time = cur_wall_time - busy_time; + if (wall) + *wall = jiffies_to_usecs(cur_wall_time); + + return jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + + return idle_time; +} + +static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, + cputime64_t *wall) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + + if (iowait_time == -1ULL) + return 0; + + return iowait_time; +} + +/************************** sysfs interface ************************/ + +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", min_sampling_rate); +} + +define_one_global_ro(sampling_rate_min); + +/* cpufreq_pegasusq Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(io_is_busy, io_is_busy); +show_one(up_threshold, up_threshold); +show_one(sampling_down_factor, sampling_down_factor); +show_one(ignore_nice_load, ignore_nice); +show_one(down_differential, down_differential); +show_one(freq_step, freq_step); +show_one(cpu_up_rate, cpu_up_rate); +show_one(cpu_down_rate, cpu_down_rate); +show_one(cpu_up_freq, cpu_up_freq); +show_one(cpu_down_freq, cpu_down_freq); +show_one(up_nr_cpus, up_nr_cpus); +show_one(max_cpu_lock, max_cpu_lock); +show_one(min_cpu_lock, min_cpu_lock); +show_one(dvfs_debug, dvfs_debug); +static ssize_t show_hotplug_lock(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", atomic_read(&g_hotplug_lock)); +} + +#define show_hotplug_param(file_name, num_core, up_down) \ +static ssize_t show_##file_name##_##num_core##_##up_down \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", file_name[num_core - 1][up_down]); \ +} + +#define store_hotplug_param(file_name, num_core, up_down) \ +static ssize_t store_##file_name##_##num_core##_##up_down \ +(struct kobject *kobj, struct attribute *attr, \ + const char *buf, size_t count) \ +{ \ + unsigned int input; \ + int ret; \ + ret = sscanf(buf, "%u", &input); \ + if (ret != 1) \ + return -EINVAL; \ + file_name[num_core - 1][up_down] = input; \ + return count; \ +} + +show_hotplug_param(hotplug_freq, 1, 1); +show_hotplug_param(hotplug_freq, 2, 0); +show_hotplug_param(hotplug_freq, 2, 1); +show_hotplug_param(hotplug_freq, 3, 0); +show_hotplug_param(hotplug_freq, 3, 1); +show_hotplug_param(hotplug_freq, 4, 0); + +show_hotplug_param(hotplug_rq, 1, 1); +show_hotplug_param(hotplug_rq, 2, 0); +show_hotplug_param(hotplug_rq, 2, 1); +show_hotplug_param(hotplug_rq, 3, 0); +show_hotplug_param(hotplug_rq, 3, 1); +show_hotplug_param(hotplug_rq, 4, 0); + +store_hotplug_param(hotplug_freq, 1, 1); +store_hotplug_param(hotplug_freq, 2, 0); +store_hotplug_param(hotplug_freq, 2, 1); +store_hotplug_param(hotplug_freq, 3, 0); +store_hotplug_param(hotplug_freq, 3, 1); +store_hotplug_param(hotplug_freq, 4, 0); + +store_hotplug_param(hotplug_rq, 1, 1); +store_hotplug_param(hotplug_rq, 2, 0); +store_hotplug_param(hotplug_rq, 2, 1); +store_hotplug_param(hotplug_rq, 3, 0); +store_hotplug_param(hotplug_rq, 3, 1); +store_hotplug_param(hotplug_rq, 4, 0); + +define_one_global_rw(hotplug_freq_1_1); +define_one_global_rw(hotplug_freq_2_0); +define_one_global_rw(hotplug_freq_2_1); +define_one_global_rw(hotplug_freq_3_0); +define_one_global_rw(hotplug_freq_3_1); +define_one_global_rw(hotplug_freq_4_0); + +define_one_global_rw(hotplug_rq_1_1); +define_one_global_rw(hotplug_rq_2_0); +define_one_global_rw(hotplug_rq_2_1); +define_one_global_rw(hotplug_rq_3_0); +define_one_global_rw(hotplug_rq_3_1); +define_one_global_rw(hotplug_rq_4_0); + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + return count; +} + +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.io_is_busy = !!input; + return count; +} + +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + dbs_tuners_ins.up_threshold = input; + return count; +} + +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input, j; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + dbs_tuners_ins.sampling_down_factor = input; + + /* Reset down sampling multiplier in case it was active */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->rate_mult = 1; + } + return count; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = + get_cpu_idle_time(j, &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + return count; +} + +static ssize_t store_down_differential(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.down_differential = min(input, 100u); + return count; +} + +static ssize_t store_freq_step(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.freq_step = min(input, 100u); + return count; +} + +static ssize_t store_cpu_up_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_up_rate = min(input, MAX_HOTPLUG_RATE); + return count; +} + +static ssize_t store_cpu_down_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_down_rate = min(input, MAX_HOTPLUG_RATE); + return count; +} + +static ssize_t store_cpu_up_freq(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_up_freq = min(input, dbs_tuners_ins.max_freq); + return count; +} + +static ssize_t store_cpu_down_freq(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_down_freq = max(input, dbs_tuners_ins.min_freq); + return count; +} + +static ssize_t store_up_nr_cpus(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.up_nr_cpus = min(input, num_possible_cpus()); + return count; +} + +static ssize_t store_max_cpu_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.max_cpu_lock = min(input, num_possible_cpus()); + return count; +} + +static ssize_t store_min_cpu_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.min_cpu_lock = min(input, num_possible_cpus()); + return count; +} + +static ssize_t store_hotplug_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + int prev_lock; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + input = min(input, num_possible_cpus()); + prev_lock = atomic_read(&dbs_tuners_ins.hotplug_lock); + + if (prev_lock) + cpufreq_pegasusq_cpu_unlock(prev_lock); + + if (input == 0) { + atomic_set(&dbs_tuners_ins.hotplug_lock, 0); + return count; + } + + ret = cpufreq_pegasusq_cpu_lock(input); + if (ret) { + printk(KERN_ERR "[HOTPLUG] already locked with smaller value %d < %d\n", + atomic_read(&g_hotplug_lock), input); + return ret; + } + + atomic_set(&dbs_tuners_ins.hotplug_lock, input); + + return count; +} + +static ssize_t store_dvfs_debug(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.dvfs_debug = input > 0; + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(io_is_busy); +define_one_global_rw(up_threshold); +define_one_global_rw(sampling_down_factor); +define_one_global_rw(ignore_nice_load); +define_one_global_rw(down_differential); +define_one_global_rw(freq_step); +define_one_global_rw(cpu_up_rate); +define_one_global_rw(cpu_down_rate); +define_one_global_rw(cpu_up_freq); +define_one_global_rw(cpu_down_freq); +define_one_global_rw(up_nr_cpus); +define_one_global_rw(max_cpu_lock); +define_one_global_rw(min_cpu_lock); +define_one_global_rw(hotplug_lock); +define_one_global_rw(dvfs_debug); + +static struct attribute *dbs_attributes[] = { + &sampling_rate_min.attr, + &sampling_rate.attr, + &up_threshold.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &io_is_busy.attr, + &down_differential.attr, + &freq_step.attr, + &cpu_up_rate.attr, + &cpu_down_rate.attr, + &cpu_up_freq.attr, + &cpu_down_freq.attr, + &up_nr_cpus.attr, + /* priority: hotplug_lock > max_cpu_lock > min_cpu_lock + Exception: hotplug_lock on early_suspend uses min_cpu_lock */ + &max_cpu_lock.attr, + &min_cpu_lock.attr, + &hotplug_lock.attr, + &dvfs_debug.attr, + &hotplug_freq_1_1.attr, + &hotplug_freq_2_0.attr, + &hotplug_freq_2_1.attr, + &hotplug_freq_3_0.attr, + &hotplug_freq_3_1.attr, + &hotplug_freq_4_0.attr, + &hotplug_rq_1_1.attr, + &hotplug_rq_2_0.attr, + &hotplug_rq_2_1.attr, + &hotplug_rq_3_0.attr, + &hotplug_rq_3_1.attr, + &hotplug_rq_4_0.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "pegasusq", +}; + +/************************** sysfs end ************************/ + +static void __cpuinit cpu_up_work(struct work_struct *work) +{ + int cpu; + int online = num_online_cpus(); + int nr_up = dbs_tuners_ins.up_nr_cpus; + int hotplug_lock = atomic_read(&g_hotplug_lock); + if (hotplug_lock) + nr_up = hotplug_lock - online; + + if (online == 1) { + printk(KERN_ERR "CPU_UP 3\n"); + cpu_up(num_possible_cpus() - 1); + nr_up -= 1; + } + + for_each_cpu_not(cpu, cpu_online_mask) { + if (nr_up-- == 0) + break; + if (cpu == 0) + continue; + printk(KERN_ERR "CPU_UP %d\n", cpu); + cpu_up(cpu); + } +} + +static void cpu_down_work(struct work_struct *work) +{ + int cpu; + int online = num_online_cpus(); + int nr_down = 1; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock) + nr_down = online - hotplug_lock; + + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + printk(KERN_ERR "CPU_DOWN %d\n", cpu); + cpu_down(cpu); + if (--nr_down == 0) + break; + } +} + +static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) +{ +#ifndef CONFIG_ARCH_EXYNOS4 + if (p->cur == p->max) + return; +#endif + + __cpufreq_driver_target(p, freq, CPUFREQ_RELATION_L); +} + +/* + * print hotplug debugging info. + * which 1 : UP, 0 : DOWN + */ +static void debug_hotplug_check(int which, int rq_avg, int freq, + struct cpu_usage *usage) +{ + int cpu; + printk(KERN_ERR "CHECK %s rq %d.%02d freq %d [", which ? "up" : "down", + rq_avg / 100, rq_avg % 100, freq); + for_each_online_cpu(cpu) { + printk(KERN_ERR "(%d, %d), ", cpu, usage->load[cpu]); + } + printk(KERN_ERR "]\n"); +} + +static int check_up(void) +{ + int num_hist = hotplug_history->num_hist; + struct cpu_usage *usage; + int freq, rq_avg; + int i; + int up_rate = dbs_tuners_ins.cpu_up_rate; + int up_freq, up_rq; + int min_freq = INT_MAX; + int min_rq_avg = INT_MAX; + int online; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock > 0) + return 0; + + online = num_online_cpus(); + up_freq = hotplug_freq[online - 1][HOTPLUG_UP_INDEX]; + up_rq = hotplug_rq[online - 1][HOTPLUG_UP_INDEX]; + + if (online == num_possible_cpus()) + return 0; + + if (dbs_tuners_ins.max_cpu_lock != 0 + && online >= dbs_tuners_ins.max_cpu_lock) + return 0; + + if (dbs_tuners_ins.min_cpu_lock != 0 + && online < dbs_tuners_ins.min_cpu_lock) + return 1; + + if (num_hist == 0 || num_hist % up_rate) + return 0; + + for (i = num_hist - 1; i >= num_hist - up_rate; --i) { + usage = &hotplug_history->usage[i]; + + freq = usage->freq; + rq_avg = usage->rq_avg; + + min_freq = min(min_freq, freq); + min_rq_avg = min(min_rq_avg, rq_avg); + + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(1, rq_avg, freq, usage); + } + + if (min_freq >= up_freq && min_rq_avg > up_rq) { + printk(KERN_ERR "[HOTPLUG IN] %s %d>=%d && %d>%d\n", + __func__, min_freq, up_freq, min_rq_avg, up_rq); + hotplug_history->num_hist = 0; + return 1; + } + return 0; +} + +static int check_down(void) +{ + int num_hist = hotplug_history->num_hist; + struct cpu_usage *usage; + int freq, rq_avg; + int i; + int down_rate = dbs_tuners_ins.cpu_down_rate; + int down_freq, down_rq; + int max_freq = 0; + int max_rq_avg = 0; + int online; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock > 0) + return 0; + + online = num_online_cpus(); + down_freq = hotplug_freq[online - 1][HOTPLUG_DOWN_INDEX]; + down_rq = hotplug_rq[online - 1][HOTPLUG_DOWN_INDEX]; + + if (online == 1) + return 0; + + if (dbs_tuners_ins.max_cpu_lock != 0 + && online > dbs_tuners_ins.max_cpu_lock) + return 1; + + if (dbs_tuners_ins.min_cpu_lock != 0 + && online <= dbs_tuners_ins.min_cpu_lock) + return 0; + + if (num_hist == 0 || num_hist % down_rate) + return 0; + + for (i = num_hist - 1; i >= num_hist - down_rate; --i) { + usage = &hotplug_history->usage[i]; + + freq = usage->freq; + rq_avg = usage->rq_avg; + + max_freq = max(max_freq, freq); + max_rq_avg = max(max_rq_avg, rq_avg); + + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(0, rq_avg, freq, usage); + } + + if (max_freq <= down_freq && max_rq_avg <= down_rq) { + printk(KERN_ERR "[HOTPLUG OUT] %s %d<=%d && %d<%d\n", + __func__, max_freq, down_freq, max_rq_avg, down_rq); + hotplug_history->num_hist = 0; + return 1; + } + + return 0; +} + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int max_load_freq; + + struct cpufreq_policy *policy; + unsigned int j; + int num_hist = hotplug_history->num_hist; + int max_hotplug_rate = max(dbs_tuners_ins.cpu_up_rate, + dbs_tuners_ins.cpu_down_rate); + int up_threshold = dbs_tuners_ins.up_threshold; + + policy = this_dbs_info->cur_policy; + + hotplug_history->usage[num_hist].freq = policy->cur; + hotplug_history->usage[num_hist].rq_avg = get_nr_run_avg(); + ++hotplug_history->num_hist; + + /* Get Absolute Load - in terms of freq */ + max_load_freq = 0; + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + cputime64_t prev_wall_time, prev_idle_time, prev_iowait_time; + unsigned int idle_time, wall_time, iowait_time; + unsigned int load, load_freq; + int freq_avg; + + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + prev_wall_time = j_dbs_info->prev_cpu_wall; + prev_idle_time = j_dbs_info->prev_cpu_idle; + prev_iowait_time = j_dbs_info->prev_cpu_iowait; + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); + + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + prev_wall_time); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + prev_idle_time); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, + prev_iowait_time); + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - + j_dbs_info->prev_cpu_nice; + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + hotplug_history->usage[num_hist].load[j] = load; + + freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load_freq = load * freq_avg; + if (load_freq > max_load_freq) + max_load_freq = load_freq; + } + + /* Check for CPU hotplug */ + if (check_up()) { + queue_work_on(this_dbs_info->cpu, dvfs_workqueue, + &this_dbs_info->up_work); + } else if (check_down()) { + queue_work_on(this_dbs_info->cpu, dvfs_workqueue, + &this_dbs_info->down_work); + } + if (hotplug_history->num_hist == max_hotplug_rate) + hotplug_history->num_hist = 0; + + /* Check for frequency increase */ + if (policy->cur < FREQ_FOR_RESPONSIVENESS) + up_threshold = UP_THRESHOLD_AT_MIN_FREQ; + + if (max_load_freq > up_threshold * policy->cur) { + int inc = (policy->max * dbs_tuners_ins.freq_step) / 100; + int target = min(policy->max, policy->cur + inc); + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max && target == policy->max) + this_dbs_info->rate_mult = + dbs_tuners_ins.sampling_down_factor; + dbs_freq_increase(policy, target); + return; + } + + /* Check for frequency decrease */ +#ifndef CONFIG_ARCH_EXYNOS4 + /* if we cannot reduce the frequency anymore, break out early */ + if (policy->cur == policy->min) + return; +#endif + + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus DOWN_DIFFERENTIAL points under + * the threshold. + */ + if (max_load_freq < + (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + policy->cur) { + unsigned int freq_next; + unsigned int down_thres; + + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + dbs_tuners_ins.down_differential); + + /* No longer fully busy, reset rate_mult */ + this_dbs_info->rate_mult = 1; + + if (freq_next < policy->min) + freq_next = policy->min; + + + down_thres = UP_THRESHOLD_AT_MIN_FREQ + - dbs_tuners_ins.down_differential; + + if (freq_next < FREQ_FOR_RESPONSIVENESS + && (max_load_freq / freq_next) > down_thres) + freq_next = FREQ_FOR_RESPONSIVENESS; + + if (policy->cur == freq_next) + return; + + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + int delay; + + mutex_lock(&dbs_info->timer_mutex); + + dbs_check_cpu(dbs_info); + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate + * dbs_info->rate_mult); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + queue_delayed_work_on(cpu, dvfs_workqueue, &dbs_info->work, delay); + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(DEF_START_DELAY * 1000 * 1000 + + dbs_tuners_ins.sampling_rate); + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + + queue_delayed_work_on(dbs_info->cpu, dvfs_workqueue, + &dbs_info->work, delay + 2 * HZ); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + cancel_delayed_work_sync(&dbs_info->work); + cancel_work_sync(&dbs_info->up_work); + cancel_work_sync(&dbs_info->down_work); +} + +static int reboot_notifier_call(struct notifier_block *this, + unsigned long code, void *_cmd) +{ + atomic_set(&g_hotplug_lock, 1); + return NOTIFY_DONE; +} + +static struct notifier_block reboot_notifier = { + .notifier_call = reboot_notifier_call, +}; + +#ifdef CONFIG_HAS_EARLYSUSPEND +static struct early_suspend early_suspend; +unsigned int prev_freq_step_slp; +unsigned int prev_sampling_rate_slp; +static void cpufreq_pegasusq_early_suspend(struct early_suspend *h) +{ +#if EARLYSUSPEND_HOTPLUGLOCK + dbs_tuners_ins.early_suspend = + atomic_read(&g_hotplug_lock); +#endif + prev_freq_step_slp = dbs_tuners_ins.freq_step; + prev_sampling_rate_slp = dbs_tuners_ins.sampling_rate; + dbs_tuners_ins.freq_step = 20; + dbs_tuners_ins.sampling_rate *= 4; +#if EARLYSUSPEND_HOTPLUGLOCK + atomic_set(&g_hotplug_lock, + (dbs_tuners_ins.min_cpu_lock) ? dbs_tuners_ins.min_cpu_lock : 1); + apply_hotplug_lock(); + stop_rq_work(); +#endif +} +static void cpufreq_pegasusq_late_resume(struct early_suspend *h) +{ +#if EARLYSUSPEND_HOTPLUGLOCK + atomic_set(&g_hotplug_lock, dbs_tuners_ins.early_suspend); +#endif + dbs_tuners_ins.early_suspend = -1; + dbs_tuners_ins.freq_step = prev_freq_step_slp; + dbs_tuners_ins.sampling_rate = prev_sampling_rate_slp; +#if EARLYSUSPEND_HOTPLUGLOCK + apply_hotplug_lock(); + start_rq_work(); +#endif +} +#endif + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + prev_apenable = apget_enable_auto_hotplug(); + apenable_auto_hotplug(false); + + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + dbs_tuners_ins.max_freq = policy->max; + dbs_tuners_ins.min_freq = policy->min; + hotplug_history->num_hist = 0; + start_rq_work(); + + mutex_lock(&dbs_mutex); + + dbs_enable++; + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + } + this_dbs_info->cpu = cpu; + this_dbs_info->rate_mult = 1; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + min_sampling_rate = MIN_SAMPLING_RATE; + dbs_tuners_ins.sampling_rate = DEF_SAMPLING_RATE; + dbs_tuners_ins.io_is_busy = 0; + } + mutex_unlock(&dbs_mutex); + + register_reboot_notifier(&reboot_notifier); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); + +#if !EARLYSUSPEND_HOTPLUGLOCK + register_pm_notifier(&pm_notifier); +#endif +#ifdef CONFIG_HAS_EARLYSUSPEND + register_early_suspend(&early_suspend); +#endif + break; + + case CPUFREQ_GOV_STOP: + apenable_auto_hotplug(prev_apenable); + +#ifdef CONFIG_HAS_EARLYSUSPEND + unregister_early_suspend(&early_suspend); +#endif +#if !EARLYSUSPEND_HOTPLUGLOCK + unregister_pm_notifier(&pm_notifier); +#endif + + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + mutex_destroy(&this_dbs_info->timer_mutex); + + unregister_reboot_notifier(&reboot_notifier); + + dbs_enable--; + mutex_unlock(&dbs_mutex); + + stop_rq_work(); + + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, + CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, + CPUFREQ_RELATION_L); + + mutex_unlock(&this_dbs_info->timer_mutex); + break; + } + return 0; +} + +static int __init cpufreq_gov_dbs_init(void) +{ + int ret; + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 0); + + ret = init_rq_avg(); + if (ret) + return ret; + + INIT_WORK(&dbs_info->up_work, cpu_up_work); + INIT_WORK(&dbs_info->down_work, cpu_down_work); + + hotplug_history = kzalloc(sizeof(struct cpu_usage_history), GFP_KERNEL); + if (!hotplug_history) { + pr_err("%s cannot create hotplug history array\n", __func__); + ret = -ENOMEM; + goto err_hist; + } + + dvfs_workqueue = create_workqueue("kpegasusq"); + if (!dvfs_workqueue) { + pr_err("%s cannot create workqueue\n", __func__); + ret = -ENOMEM; + goto err_queue; + } + + ret = cpufreq_register_governor(&cpufreq_gov_pegasusq); + if (ret) + goto err_reg; + +#ifdef CONFIG_HAS_EARLYSUSPEND + early_suspend.level = EARLY_SUSPEND_LEVEL_DISABLE_FB; + early_suspend.suspend = cpufreq_pegasusq_early_suspend; + early_suspend.resume = cpufreq_pegasusq_late_resume; +#endif + + return ret; + +err_reg: + destroy_workqueue(dvfs_workqueue); +err_queue: + kfree(hotplug_history); +err_hist: + kfree(rq_data); + return ret; +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_pegasusq); + destroy_workqueue(dvfs_workqueue); + kfree(hotplug_history); + kfree(rq_data); +} + +MODULE_AUTHOR("ByungChang Cha "); +MODULE_DESCRIPTION("'cpufreq_pegasusq' - A dynamic cpufreq/cpuhotplug governor"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/drivers/cpufreq/cpufreq_smartassv2.c b/drivers/cpufreq/cpufreq_smartassH3.c similarity index 97% rename from drivers/cpufreq/cpufreq_smartassv2.c rename to drivers/cpufreq/cpufreq_smartassH3.c index 3b51a788..7e0891ed 100644 --- a/drivers/cpufreq/cpufreq_smartassv2.c +++ b/drivers/cpufreq/cpufreq_smartassH3.c @@ -1,5 +1,5 @@ /* - * drivers/cpufreq/cpufreq_smartassv2.c + * drivers/cpufreq/cpufreq_smartassH3.c * * Copyright (C) 2010 Google, Inc. * @@ -162,15 +162,15 @@ enum { */ static unsigned long debug_mask; -static int cpufreq_governor_smartassv2(struct cpufreq_policy *policy, +static int cpufreq_governor_smartass_h3(struct cpufreq_policy *policy, unsigned int event); -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SMARTASSV2 +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SMARTASSH3 static #endif -struct cpufreq_governor cpufreq_gov_smartassv2 = { - .name = "smartassv2", - .governor = cpufreq_governor_smartassv2, +struct cpufreq_governor cpufreq_gov_smartass_h3 = { + .name = "smartassH3", + .governor = cpufreq_governor_smartass_h3, .max_transition_latency = 9000000, .owner = THIS_MODULE, }; @@ -692,10 +692,10 @@ static struct attribute * smartass_attributes[] = { static struct attribute_group smartass_attr_group = { .attrs = smartass_attributes, - .name = "smartassv2", + .name = "smartassH3", }; -static int cpufreq_governor_smartassv2(struct cpufreq_policy *new_policy, +static int cpufreq_governor_smartass_h3(struct cpufreq_policy *new_policy, unsigned int event) { unsigned int cpu = new_policy->cpu; @@ -880,10 +880,10 @@ static int __init cpufreq_smartass_init(void) register_early_suspend(&smartass_power_suspend); - return cpufreq_register_governor(&cpufreq_gov_smartassv2); + return cpufreq_register_governor(&cpufreq_gov_smartass_h3); } -#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SMARTASSV2 +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SMARTASSH3 fs_initcall(cpufreq_smartass_init); #else module_init(cpufreq_smartass_init); @@ -891,7 +891,7 @@ module_init(cpufreq_smartass_init); static void __exit cpufreq_smartass_exit(void) { - cpufreq_unregister_governor(&cpufreq_gov_smartassv2); + cpufreq_unregister_governor(&cpufreq_gov_smartass_h3); destroy_workqueue(up_wq); destroy_workqueue(down_wq); } @@ -899,7 +899,6 @@ static void __exit cpufreq_smartass_exit(void) module_exit(cpufreq_smartass_exit); MODULE_AUTHOR ("Erasmux, moded by H3ROS & C3C0"); -MODULE_DESCRIPTION ("'cpufreq_smartassv2' - A smart cpufreq governor"); +MODULE_DESCRIPTION ("'cpufreq_smartassH3' - A smart cpufreq governor"); MODULE_LICENSE ("GPL"); - diff --git a/drivers/cpufreq/cpufreq_wheatley.c b/drivers/cpufreq/cpufreq_wheatley.c new file mode 100644 index 00000000..a020121f --- /dev/null +++ b/drivers/cpufreq/cpufreq_wheatley.c @@ -0,0 +1,839 @@ +/* + * drivers/cpufreq/cpufreq_wheatley.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * (C) 2012 Ezekeel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (100000) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) +#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (50000) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) +#define DEF_TARGET_RESIDENCY (10000) +#define DEF_ALLOWED_MISSES (5) + +/* + * The polling frequency of this governor depends on the capability of + * the processor. Default polling frequency is 1000 times the transition + * latency of the processor. The governor will work on any processor with + * transition latency <= 10mS, using appropriate sampling + * rate. + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. + * All times here are in uS. + */ +#define MIN_SAMPLING_RATE_RATIO (2) + +static unsigned int min_sampling_rate, num_misses; + +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (20) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_WHEATLEY +static +#endif +struct cpufreq_governor cpufreq_gov_wheatley = { + .name = "wheatley", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; + unsigned int rate_mult; + int cpu; + unsigned int sample_type:1; + unsigned long long prev_idletime; + unsigned long long prev_idleusage; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); + +DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * dbs_mutex protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int ignore_nice; + unsigned int sampling_down_factor; + unsigned int powersave_bias; + unsigned int io_is_busy; + unsigned int target_residency; + unsigned int allowed_misses; +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .ignore_nice = 0, + .powersave_bias = 0, + .target_residency = DEF_TARGET_RESIDENCY, + .allowed_misses = DEF_ALLOWED_MISSES, +}; + +static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, + u64 *wall) +{ + u64 idle_time; + u64 cur_wall_time; + u64 busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + + idle_time = cur_wall_time - busy_time; + if (wall) + *wall = jiffies_to_usecs(cur_wall_time); + + return jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + else + idle_time += get_cpu_iowait_time_us(cpu, wall); + + return idle_time; +} + +static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + + if (iowait_time == -1ULL) + return 0; + + return iowait_time; +} + +/* + * Find right freq to be set now with powersave_bias on. + * Returns the freq_hi to be used right now and will set freq_hi_jiffies, + * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + */ +static unsigned int powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, + unsigned int relation) +{ + unsigned int freq_req, freq_reduc, freq_avg; + unsigned int freq_hi, freq_lo; + unsigned int index = 0; + unsigned int jiffies_total, jiffies_hi, jiffies_lo; + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + policy->cpu); + + if (!dbs_info->freq_table) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_next; + } + + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, + relation, &index); + freq_req = dbs_info->freq_table[index].frequency; + freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; + freq_avg = freq_req - freq_reduc; + + /* Find freq bounds for freq_avg in freq_table */ + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_H, &index); + freq_lo = dbs_info->freq_table[index].frequency; + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_L, &index); + freq_hi = dbs_info->freq_table[index].frequency; + + /* Find out how long we have to be in hi and lo freqs */ + if (freq_hi == freq_lo) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_lo; + } + jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + jiffies_hi = (freq_avg - freq_lo) * jiffies_total; + jiffies_hi += ((freq_hi - freq_lo) / 2); + jiffies_hi /= (freq_hi - freq_lo); + jiffies_lo = jiffies_total - jiffies_hi; + dbs_info->freq_lo = freq_lo; + dbs_info->freq_lo_jiffies = jiffies_lo; + dbs_info->freq_hi_jiffies = jiffies_hi; + return freq_hi; +} + +static void wheatley_powersave_bias_init_cpu(int cpu) +{ + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; +} + +static void wheatley_powersave_bias_init(void) +{ + int i; + for_each_online_cpu(i) { + wheatley_powersave_bias_init_cpu(i); + } +} + +/************************** sysfs interface ************************/ + +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", min_sampling_rate); +} + +define_one_global_ro(sampling_rate_min); + +/* cpufreq_wheatley Governor Tunables */ +#define show_one(file_name, object) \ + static ssize_t show_##file_name \ + (struct kobject *kobj, struct attribute *attr, char *buf) \ + { \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ + } +show_one(sampling_rate, sampling_rate); +show_one(io_is_busy, io_is_busy); +show_one(up_threshold, up_threshold); +show_one(sampling_down_factor, sampling_down_factor); +show_one(ignore_nice_load, ignore_nice); +show_one(powersave_bias, powersave_bias); +show_one(target_residency, target_residency); +show_one(allowed_misses, allowed_misses); + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + return count; +} + +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.io_is_busy = !!input; + return count; +} + +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + dbs_tuners_ins.up_threshold = input; + return count; +} + +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, const char *buf, size_t count) +{ + unsigned int input, j; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + dbs_tuners_ins.sampling_down_factor = input; + + /* Reset down sampling multiplier in case it was active */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->rate_mult = 1; + } + return count; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + + } + return count; +} + +static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 1000) + input = 1000; + + dbs_tuners_ins.powersave_bias = input; + wheatley_powersave_bias_init(); + return count; +} + +static ssize_t store_target_residency(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.target_residency = input; + return count; +} + +static ssize_t store_allowed_misses(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.allowed_misses = input; + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(io_is_busy); +define_one_global_rw(up_threshold); +define_one_global_rw(sampling_down_factor); +define_one_global_rw(ignore_nice_load); +define_one_global_rw(powersave_bias); +define_one_global_rw(target_residency); +define_one_global_rw(allowed_misses); + +static struct attribute *dbs_attributes[] = { + &sampling_rate_min.attr, + &sampling_rate.attr, + &up_threshold.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &powersave_bias.attr, + &io_is_busy.attr, + &target_residency.attr, + &allowed_misses.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "wheatley", +}; + +/************************** sysfs end ************************/ + +static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) +{ + if (dbs_tuners_ins.powersave_bias) + freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); + else if (p->cur == p->max) + return; + + __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ? + CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); +} + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int max_load_freq; + + struct cpufreq_policy *policy; + unsigned int j; + + unsigned long total_idletime, total_usage; + + this_dbs_info->freq_lo = 0; + policy = this_dbs_info->cur_policy; + + /* + * Every sampling_rate, we check, if current idle time is less + * than 20% (default), then we try to increase frequency + * Every sampling_rate, we look for a the lowest + * frequency which can sustain the load while keeping idle time over + * 30%. If such a frequency exist, we try to decrease to this frequency. + * + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of current frequency + */ + + /* Get Absolute Load - in terms of freq */ + max_load_freq = 0; + total_idletime = 0; + total_usage = 0; + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + unsigned int idle_time, wall_time, iowait_time; + unsigned int load, load_freq; + int freq_avg; + struct cpuidle_device * j_cpuidle_dev = NULL; +// struct cpuidle_state * deepidle_state = NULL; +// unsigned long long deepidle_time, deepidle_usage; + + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); + + wall_time = (unsigned int) (cur_wall_time - j_dbs_info->prev_cpu_wall); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) (cur_idle_time - j_dbs_info->prev_cpu_idle); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + iowait_time = (unsigned int) (cur_iowait_time - j_dbs_info->prev_cpu_iowait); + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - + j_dbs_info->prev_cpu_nice; + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + /* + * For the purpose of wheatley, waiting for disk IO is an + * indication that you're performance critical, and not that + * the system is actually idle. So subtract the iowait time + * from the cpu idle time. + */ + + if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + + freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load_freq = load * freq_avg; + if (load_freq > max_load_freq) + max_load_freq = load_freq; + + j_cpuidle_dev = per_cpu(cpuidle_devices, j); + +/* + if (j_cpuidle_dev) + deepidle_state = &j_cpuidle_dev->states[j_cpuidle_dev->state_count - 1]; + + if (deepidle_state) { + deepidle_time = deepidle_state->time; + deepidle_usage = deepidle_state->usage; + + total_idletime += (unsigned long)(deepidle_time - j_dbs_info->prev_idletime); + total_usage += (unsigned long)(deepidle_usage - j_dbs_info->prev_idleusage); + + j_dbs_info->prev_idletime = deepidle_time; + j_dbs_info->prev_idleusage = deepidle_usage; + } +*/ + } + + if (total_usage > 0 && total_idletime / total_usage >= dbs_tuners_ins.target_residency) { + if (num_misses > 0) + num_misses--; + } else { + if (num_misses <= dbs_tuners_ins.allowed_misses) + num_misses++; + } + + /* Check for frequency increase */ + if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur + || num_misses <= dbs_tuners_ins.allowed_misses) { + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max) + this_dbs_info->rate_mult = + dbs_tuners_ins.sampling_down_factor; + dbs_freq_increase(policy, policy->max); + return; + } + + /* Check for frequency decrease */ + /* if we cannot reduce the frequency anymore, break out early */ + if (policy->cur == policy->min) + return; + + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus 10 points under the threshold. + */ + if (max_load_freq < + (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + policy->cur) { + unsigned int freq_next; + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + dbs_tuners_ins.down_differential); + + /* No longer fully busy, reset rate_mult */ + this_dbs_info->rate_mult = 1; + + if (freq_next < policy->min) + freq_next = policy->min; + + if (!dbs_tuners_ins.powersave_bias) { + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } else { + int freq = powersave_bias_target(policy, freq_next, + CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + int sample_type = dbs_info->sample_type; + + int delay; + + mutex_lock(&dbs_info->timer_mutex); + + /* Common NORMAL_SAMPLE setup */ + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + if (!dbs_tuners_ins.powersave_bias || + sample_type == DBS_NORMAL_SAMPLE) { + dbs_check_cpu(dbs_info); + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = DBS_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; + } else { + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate + * dbs_info->rate_mult); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + } + } else { + __cpufreq_driver_target(dbs_info->cur_policy, + dbs_info->freq_lo, CPUFREQ_RELATION_H); + delay = dbs_info->freq_lo_jiffies; + } + schedule_delayed_work_on(cpu, &dbs_info->work, delay); + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + cancel_delayed_work_sync(&dbs_info->work); +} + +/* + * Not all CPUs want IO time to be accounted as busy; this dependson how + * efficient idling at a higher frequency/voltage is. + * Pavel Machek says this is not so for various generations of AMD and old + * Intel systems. + * Mike Chan (androidlcom) calis this is also not true for ARM. + * Because of this, whitelist specific known (series) of CPUs by default, and + * leave all others up to the user. + */ +static int should_io_be_busy(void) +{ +#if defined(CONFIG_X86) + /* + * For Intel, Core 2 (model 15) andl later have an efficient idle. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 15) + return 1; +#endif + return 0; +} + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + mutex_lock(&dbs_mutex); + + dbs_enable++; + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + } + this_dbs_info->cpu = cpu; + this_dbs_info->rate_mult = 1; + wheatley_powersave_bias_init_cpu(cpu); + num_misses = 0; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + unsigned int latency; + + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + /* Bring kernel and HW constraints together */ + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); + dbs_tuners_ins.io_is_busy = should_io_be_busy(); + } + mutex_unlock(&dbs_mutex); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); + break; + + case CPUFREQ_GOV_STOP: + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + mutex_destroy(&this_dbs_info->timer_mutex); + dbs_enable--; + mutex_unlock(&dbs_mutex); + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, CPUFREQ_RELATION_L); + mutex_unlock(&this_dbs_info->timer_mutex); + break; + } + return 0; +} + +static int __init cpufreq_gov_dbs_init(void) +{ + u64 idle_time; + int cpu = get_cpu(); + + idle_time = get_cpu_idle_time_us(cpu, NULL); + put_cpu(); + if (idle_time != -1ULL) { + /* Idle micro accounting is supported. Use finer thresholds */ + dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_tuners_ins.down_differential = + MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + /* + * In no_hz/micro accounting case we set the minimum frequency + * not depending on HZ, but fixed (very low). The deferred + * timer might skip some samples if idle/sleeping as needed. + */ + min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + /* For correct statistics, we need 10 ticks for each measure */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + } + + return cpufreq_register_governor(&cpufreq_gov_wheatley); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_wheatley); +} + + +MODULE_AUTHOR("Venkatesh Pallipadi "); +MODULE_AUTHOR("Alexey Starikovskiy "); +MODULE_AUTHOR("Ezekeel "); +MODULE_DESCRIPTION("'cpufreq_wheatley' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_WHEATLEY +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index eb3defb3..d814401b 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -15,6 +15,7 @@ typedef u64 __nocast cputime64_t; #define cputime64_to_jiffies64(__ct) (__force u64)(__ct) #define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) +#define cputime64_sub(__a, __b) ((__a) - (__b)) #define nsecs_to_cputime64(__ct) \ jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 5315db0a..049f880b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -107,8 +107,8 @@ xt_socket_get4_sk(const struct sk_buff *skb, struct xt_action_param *par) struct udphdr _hdr, *hp = NULL; struct sock *sk; __be32 daddr, saddr; - __be16 dport, sport; - u8 protocol; + __be16 dport, sport = 0; + u8 protocol = 0; #ifdef XT_SOCKET_HAVE_CONNTRACK struct nf_conn const *ct; enum ip_conntrack_info ctinfo; @@ -265,8 +265,8 @@ xt_socket_get6_sk(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk; - struct in6_addr *daddr, *saddr; - __be16 dport, sport; + struct in6_addr *daddr = NULL, *saddr = NULL; + __be16 dport = 0, sport = 0; int thoff, tproto; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);